From a37ec31f5dafe252b358bf667653b0e9f3131c2c Mon Sep 17 00:00:00 2001 From: Oleh Luchkiv Date: Wed, 27 Aug 2025 09:10:05 -0700 Subject: [PATCH 01/25] Added local docker deployment --- Makefile | 106 +++++++++++++++++++++++++++++++++++++++++++++ build-local.sh | 50 +++++++++++++++++++++ docker-compose.yml | 52 ++++++++++++++++++++++ run-local.sh | 58 +++++++++++++++++++++++++ 4 files changed, 266 insertions(+) create mode 100644 Makefile create mode 100755 build-local.sh create mode 100644 docker-compose.yml create mode 100755 run-local.sh diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..9fdfcde --- /dev/null +++ b/Makefile @@ -0,0 +1,106 @@ +# Makefile for kernel-browser local development +# Using kernel-images native build system + +.PHONY: help build run stop logs clean dev status shell test + +# Default target +help: ## Show this help message + @echo "Kernel Browser - Local Development (using kernel-images build system)" + @echo "==================================================================" + @echo "" + @echo "Available commands:" + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf " %-15s %s\n", $$1, $$2}' + +init: ## Initialize submodules (run this first) + git submodule update --init --recursive + @echo "โœ… Submodules initialized" + +build: init ## Build using kernel-images build system + @echo "๐Ÿ”จ Building with kernel-images native build system..." + ./build-local.sh + @echo "โœ… Build complete" + +run: ## Run using kernel-images run system (interactive) + @echo "๐Ÿš€ Starting kernel-browser using native run script..." + ./run-local.sh + +compose-up: build ## Start with docker-compose (background) + @echo "๐Ÿš€ Starting with docker-compose..." + docker-compose up -d + @$(MAKE) --no-print-directory info + @echo "" + @echo "๐Ÿ“Š View logs with: make logs" + +compose-dev: build ## Start with docker-compose (foreground with logs) + @echo "๐Ÿš€ Starting with docker-compose in development mode..." + docker-compose up + +dev: compose-dev ## Alias for compose-dev + +stop: ## Stop all containers + @echo "๐Ÿ›‘ Stopping containers..." + docker-compose down + docker stop kernel-browser-local 2>/dev/null || true + docker rm kernel-browser-local 2>/dev/null || true + @echo "โœ… Containers stopped" + +restart: ## Restart containers + @$(MAKE) --no-print-directory stop + @$(MAKE) --no-print-directory compose-up + +logs: ## Show container logs + docker-compose logs -f kernel-browser || docker logs -f kernel-browser-local + +status: ## Show container status + @echo "Docker Compose Status:" + @docker-compose ps || true + @echo "" + @echo "Direct Container Status:" + @docker ps --filter name=kernel-browser + +shell: ## Get shell access to running container + docker exec -it kernel-browser-local bash || docker-compose exec kernel-browser bash + +info: ## Show connection information + @echo "" + @echo "๐ŸŒ Service Access Points:" + @echo " WebRTC Client: http://localhost:8080" + @echo " Chrome DevTools: http://localhost:9222/json" + @echo " Recording API: http://localhost:444/api" + @echo " Health Check: http://localhost:8080/" + +test: ## Test service endpoints + @echo "๐Ÿงช Testing service endpoints..." + @echo -n "WebRTC Client (8080): " + @curl -s -o /dev/null -w "%{http_code}" http://localhost:8080/ || echo "Failed to connect" + @echo "" + @echo -n "Chrome DevTools (9222): " + @curl -s -o /dev/null -w "%{http_code}" http://localhost:9222/json/version || echo "Failed to connect" + @echo "" + @echo -n "Recording API (444): " + @curl -s -o /dev/null -w "%{http_code}" http://localhost:444/ && echo " (404 is normal - API is running)" || echo "Failed to connect" + @echo "" + @echo "๐ŸŽฏ All services are ready! Access points:" + @echo " WebRTC Client: http://localhost:8080" + @echo " Chrome DevTools: http://localhost:9222/json" + +clean: stop ## Clean up everything + @echo "๐Ÿงน Cleaning up..." + docker-compose down -v 2>/dev/null || true + docker rmi kernel-browser:local 2>/dev/null || true + docker system prune -f + rm -rf recordings/* 2>/dev/null || true + rm -rf kernel-images/images/chromium-headful/.tmp 2>/dev/null || true + @echo "โœ… Cleanup complete" + +# Alternative commands for different approaches +native-build: init ## Build using kernel-images native script directly + cd kernel-images/images/chromium-headful && \ + UKC_TOKEN=dummy-token UKC_METRO=dummy-metro IMAGE=kernel-browser:local ./build-docker.sh + +native-run: ## Run using kernel-images native script directly + cd kernel-images/images/chromium-headful && \ + UKC_TOKEN=dummy-token UKC_METRO=dummy-metro IMAGE=kernel-browser:local NAME=kernel-browser-local ENABLE_WEBRTC=true ./run-docker.sh + +# Quick development workflow +quick: init build compose-up test ## Quick setup: init + build + run + test \ No newline at end of file diff --git a/build-local.sh b/build-local.sh new file mode 100755 index 0000000..975196e --- /dev/null +++ b/build-local.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash + +# Local build wrapper for kernel-images chromium-headful +set -e -o pipefail + +echo "๐Ÿ”จ Building kernel-browser using kernel-images build system..." + +# Ensure we're in the right directory +SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd) +cd "$SCRIPT_DIR" + +# Check if kernel-images submodule exists and is initialized +if [ ! -d "kernel-images" ]; then + echo "โŒ Error: kernel-images submodule not found" + echo " Run: git submodule update --init --recursive" + exit 1 +fi + +if [ ! -f "kernel-images/images/chromium-headful/build-docker.sh" ]; then + echo "โŒ Error: kernel-images submodule appears empty" + echo " Run: git submodule update --init --recursive" + exit 1 +fi + +# Change to kernel-images directory and build using their system +echo "๐Ÿ“ Changing to kernel-images directory..." +cd kernel-images/images/chromium-headful + +# Make build script executable +chmod +x build-docker.sh + +# Set image name for local use +export IMAGE="kernel-browser:local" +export NAME="kernel-browser-local" + +# Set dummy UKC variables to bypass cloud requirements (we only need Docker) +export UKC_TOKEN="dummy-token-for-local-build" +export UKC_METRO="dummy-metro-for-local-build" + +echo "๐Ÿš€ Starting build with kernel-images build system..." +echo " Image: $IMAGE" +echo " Bypassing UKC requirements for local Docker build..." + +# Run the official build script +./build-docker.sh + +echo "โœ… Build completed successfully!" +echo " Image built: $IMAGE" +echo "" +echo "๐Ÿƒ To run locally, use: ./run-local.sh" \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..42a2744 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,52 @@ +version: '3.8' + +services: + kernel-browser: + image: "kernel-browser:local" + container_name: "kernel-browser-local" + privileged: true + shm_size: 2gb + deploy: + resources: + limits: + memory: 8192M + ports: + # Chrome DevTools Protocol (matches kernel-images default) + - "9222:9222" + # Recording API (matches kernel-images default) + - "444:10001" + # WebRTC client interface + - "8080:8080" + # WebRTC UDP port range for local development + - "56000-56100:56000-56100/udp" + environment: + # Display settings + - DISPLAY_NUM=1 + - HEIGHT=768 + - WIDTH=1024 + # WebRTC settings + - ENABLE_WEBRTC=true + - NEKO_WEBRTC_EPR=56000-56100 + - NEKO_WEBRTC_NAT1TO1=127.0.0.1 + # Run as kernel user (not root) + - RUN_AS_ROOT=false + # Mount Chromium flags + - CHROMIUM_FLAGS=--user-data-dir=/home/kernel/user-data --disable-dev-shm-usage --start-maximized --remote-allow-origins=* --no-sandbox --disable-setuid-sandbox + volumes: + # Persist recordings in local directory + - "./recordings:/recordings" + # Mount Chromium flags file (will be created by run script) + - "./kernel-images/images/chromium-headful/.tmp/chromium/flags:/chromium/flags:ro" + tmpfs: + - /dev/shm:size=2g + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8080"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 120s # Allow more time for startup + +volumes: + recordings: + driver: local \ No newline at end of file diff --git a/run-local.sh b/run-local.sh new file mode 100755 index 0000000..cfab5c4 --- /dev/null +++ b/run-local.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash + +# Local run wrapper for kernel-images chromium-headful +set -e -o pipefail + +echo "๐Ÿš€ Starting kernel-browser locally using kernel-images run system..." + +# Ensure we're in the right directory +SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd) +cd "$SCRIPT_DIR" + +# Check if kernel-images submodule exists +if [ ! -d "kernel-images" ] || [ ! -f "kernel-images/images/chromium-headful/run-docker.sh" ]; then + echo "โŒ Error: kernel-images submodule not found or incomplete" + echo " Run: git submodule update --init --recursive" + exit 1 +fi + +# Create local recordings directory +mkdir -p "$SCRIPT_DIR/recordings" + +# Change to kernel-images directory +cd kernel-images/images/chromium-headful + +# Make run script executable +chmod +x run-docker.sh + +# Set environment variables for local development +export IMAGE="kernel-browser:local" +export NAME="kernel-browser-local" +export ENABLE_WEBRTC="true" +export RUN_AS_ROOT="false" + +# Set dummy UKC variables to bypass cloud requirements (we only need Docker) +export UKC_TOKEN="dummy-token-for-local-run" +export UKC_METRO="dummy-metro-for-local-run" + +# Local-friendly Chrome flags (less restrictive than cloud) +export CHROMIUM_FLAGS="--user-data-dir=/home/kernel/user-data --disable-dev-shm-usage --start-maximized --remote-allow-origins=* --no-sandbox --disable-setuid-sandbox" + +echo "๐Ÿ”ง Configuration:" +echo " Image: $IMAGE" +echo " Container: $NAME" +echo " WebRTC: $ENABLE_WEBRTC" +echo " Run as root: $RUN_AS_ROOT" +echo " Recordings: $SCRIPT_DIR/recordings" +echo "" + +echo "๐Ÿƒ Starting container with kernel-images run system..." + +# Run using the official run script +./run-docker.sh + +echo "" +echo "๐ŸŒ Service should be accessible at:" +echo " WebRTC Client: http://localhost:8080" +echo " Chrome DevTools: http://localhost:9222" +echo " Recording API: http://localhost:444" \ No newline at end of file From a21844bcad8f170270c5245329fd16f3d3143acd Mon Sep 17 00:00:00 2001 From: Oleh Luchkiv Date: Sun, 31 Aug 2025 11:13:52 -0700 Subject: [PATCH 02/25] Neko + Devtools run but separately --- Dockerfile.devtools | 63 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 Dockerfile.devtools diff --git a/Dockerfile.devtools b/Dockerfile.devtools new file mode 100644 index 0000000..edefa5e --- /dev/null +++ b/Dockerfile.devtools @@ -0,0 +1,63 @@ +# DevTools Frontend build stage using browser-operator-core +FROM --platform=linux/amd64 ubuntu:22.04 AS devtools-builder + +# Install required packages for DevTools frontend build +RUN apt-get update && apt-get install -y \ + curl \ + git \ + python3 \ + python3-pip \ + python-is-python3 \ + wget \ + unzip \ + sudo \ + ca-certificates \ + build-essential \ + && rm -rf /var/lib/apt/lists/* + +# Install Node.js 18.x +RUN curl -fsSL https://deb.nodesource.com/setup_18.x | bash - && \ + apt-get install -y nodejs && \ + rm -rf /var/lib/apt/lists/* + +WORKDIR /workspace + +# Clone depot_tools +RUN git clone https://chromium.googlesource.com/chromium/tools/depot_tools.git +ENV PATH="/workspace/depot_tools:${PATH}" +ENV DEPOT_TOOLS_UPDATE=0 + +# Follow README instructions exactly - fetching code +RUN mkdir devtools +WORKDIR /workspace/devtools +RUN fetch devtools-frontend + +# Build steps +WORKDIR /workspace/devtools/devtools-frontend + +RUN gclient sync +RUN /workspace/depot_tools/ensure_bootstrap + +# Build standard DevTools first +RUN npm run build + +# Add Browser Operator fork and switch to it +RUN git remote add upstream https://github.com/BrowserOperator/browser-operator-core.git +RUN git fetch upstream +RUN git checkout upstream/main + +# Build Browser Operator version +RUN npm run build + +# Production stage for DevTools frontend +FROM nginx:alpine AS devtools-frontend +WORKDIR /usr/share/nginx/html + +# Copy the built DevTools frontend from builder +COPY --from=devtools-builder /workspace/devtools/devtools-frontend/out/Default/gen/front_end . + +# Copy nginx config from browser-operator-core +COPY browser-operator-core/docker/nginx.conf /etc/nginx/conf.d/default.conf + +# Create health check endpoint +RUN echo '{"status": "healthy", "service": "browser-operator-devtools"}' > health.json \ No newline at end of file From 50a4a21efae2b7609553bb1a497da867b06d39df Mon Sep 17 00:00:00 2001 From: Oleh Luchkiv Date: Sun, 31 Aug 2025 11:55:04 -0700 Subject: [PATCH 03/25] Local setup running with devtools --- Dockerfile.local-extended | 269 ++++++++++++++++++++++++ Makefile | 60 +++++- nginx-devtools.conf | 79 +++++++ run-local-extended.sh | 71 +++++++ supervisor/services/nginx-devtools.conf | 15 ++ 5 files changed, 493 insertions(+), 1 deletion(-) create mode 100644 Dockerfile.local-extended create mode 100644 nginx-devtools.conf create mode 100755 run-local-extended.sh create mode 100644 supervisor/services/nginx-devtools.conf diff --git a/Dockerfile.local-extended b/Dockerfile.local-extended new file mode 100644 index 0000000..0bb0ada --- /dev/null +++ b/Dockerfile.local-extended @@ -0,0 +1,269 @@ +# Extended Dockerfile combining kernel-images with DevTools frontend +# This extends the kernel-images base with Browser Operator DevTools static files + +# DevTools Frontend build stage using browser-operator-core +FROM --platform=linux/amd64 ubuntu:22.04 AS devtools-builder + +# Install required packages for DevTools frontend build +RUN apt-get update && apt-get install -y \ + curl \ + git \ + python3 \ + python3-pip \ + python-is-python3 \ + wget \ + unzip \ + sudo \ + ca-certificates \ + build-essential \ + && rm -rf /var/lib/apt/lists/* + +# Install Node.js 18.x +RUN curl -fsSL https://deb.nodesource.com/setup_18.x | bash - && \ + apt-get install -y nodejs && \ + rm -rf /var/lib/apt/lists/* + +WORKDIR /workspace + +# Clone depot_tools +RUN git clone https://chromium.googlesource.com/chromium/tools/depot_tools.git +ENV PATH="/workspace/depot_tools:${PATH}" +ENV DEPOT_TOOLS_UPDATE=0 + +# Follow README instructions exactly - fetching code +RUN mkdir devtools +WORKDIR /workspace/devtools +RUN fetch devtools-frontend + +# Build steps +WORKDIR /workspace/devtools/devtools-frontend + +RUN gclient sync +RUN /workspace/depot_tools/ensure_bootstrap + +# Build standard DevTools first +RUN npm run build + +# Add Browser Operator fork and switch to it +RUN git remote add upstream https://github.com/BrowserOperator/browser-operator-core.git +RUN git fetch upstream +RUN git checkout upstream/main + +# Build Browser Operator version +RUN npm run build + +# ============================================================================ +# Use kernel-images base with DevTools integration +# ============================================================================ +FROM docker.io/golang:1.25.0 AS server-builder +WORKDIR /workspace/server + +ARG TARGETOS +ARG TARGETARCH +ENV CGO_ENABLED=0 + +COPY kernel-images/server/go.mod ./ +COPY kernel-images/server/go.sum ./ +RUN go mod download + +COPY kernel-images/server/ . +RUN GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH:-amd64} \ + go build -ldflags="-s -w" -o /out/kernel-images-api ./cmd/api + +# webrtc client +FROM node:22-bullseye-slim AS client +WORKDIR /src +COPY kernel-images/images/chromium-headful/client/package*.json ./ +RUN npm install +COPY kernel-images/images/chromium-headful/client/ . +RUN npm run build + +# xorg dependencies +FROM docker.io/ubuntu:22.04 AS xorg-deps +WORKDIR /xorg +ENV DEBIAN_FRONTEND=noninteractive +RUN set -eux; \ + apt-get update; \ + apt-get install -y \ + git gcc pkgconf autoconf automake libtool make xorg-dev xutils-dev \ + && rm -rf /var/lib/apt/lists/*; +COPY kernel-images/images/chromium-headful/xorg-deps/ /xorg/ +# build xf86-video-dummy v0.3.8 with RandR support +RUN set -eux; \ + cd xf86-video-dummy/v0.3.8; \ + patch -p1 < ../01_v0.3.8_xdummy-randr.patch; \ + autoreconf -v --install; \ + ./configure; \ + make -j$(nproc); \ + make install; +# build custom input driver +RUN set -eux; \ + cd xf86-input-neko; \ + ./autogen.sh --prefix=/usr; \ + ./configure; \ + make -j$(nproc); \ + make install; + +FROM ghcr.io/onkernel/neko/base:3.0.6-v1.0.1 AS neko +# ^--- now has event.SYSTEM_PONG with legacy support to keepalive + +# Final stage: kernel-images base + DevTools static files +FROM docker.io/ubuntu:22.04 + +ENV DEBIAN_FRONTEND=noninteractive +ENV DEBIAN_PRIORITY=high + +RUN apt-get update && \ + apt-get -y upgrade && \ + apt-get -y install \ + # UI Requirements + xvfb \ + xterm \ + xdotool \ + scrot \ + imagemagick \ + sudo \ + mutter \ + # Python/pyenv reqs + build-essential \ + libssl-dev \ + zlib1g-dev \ + libbz2-dev \ + libreadline-dev \ + libsqlite3-dev \ + curl \ + git \ + libncursesw5-dev \ + xz-utils \ + tk-dev \ + libxml2-dev \ + libxmlsec1-dev \ + libffi-dev \ + liblzma-dev \ + # Network tools + net-tools \ + netcat \ + # PPA req + software-properties-common \ + # Add nginx for DevTools serving + nginx && \ + # Userland apps + sudo add-apt-repository ppa:mozillateam/ppa && \ + sudo apt-get install -y --no-install-recommends \ + chromium-browser \ + libreoffice \ + x11-apps \ + xpdf \ + gedit \ + xpaint \ + tint2 \ + galculator \ + pcmanfm \ + wget \ + xdg-utils \ + libvulkan1 \ + fonts-liberation \ + unzip && \ + apt-get clean + +# install ffmpeg manually since the version available in apt is from the 4.x branch due to #drama. +# as of writing these static builds will be the latest 7.0.x release. +RUN set -eux; \ + URL="https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-amd64-static.tar.xz"; \ + echo "Downloading FFmpeg static build from $URL"; \ + curl -fsSL "$URL" -o /tmp/ffmpeg.tar.xz; \ + tar -xJf /tmp/ffmpeg.tar.xz -C /tmp; \ + install -m755 /tmp/ffmpeg-*/ffmpeg /usr/local/bin/ffmpeg; \ + install -m755 /tmp/ffmpeg-*/ffprobe /usr/local/bin/ffprobe; \ + rm -rf /tmp/ffmpeg* + +# runtime +ENV USERNAME=root +RUN set -eux; \ + apt-get update; \ + apt-get install -y --no-install-recommends \ + wget ca-certificates python2 supervisor xclip xdotool \ + pulseaudio dbus-x11 xserver-xorg-video-dummy \ + libcairo2 libxcb1 libxrandr2 libxv1 libopus0 libvpx7 \ + gstreamer1.0-plugins-base gstreamer1.0-plugins-good \ + gstreamer1.0-plugins-bad gstreamer1.0-plugins-ugly \ + gstreamer1.0-pulseaudio gstreamer1.0-omx; \ + # + # install libxcvt0 (not available in debian:bullseye) + ARCH=$(dpkg --print-architecture); \ + wget http://ftp.de.debian.org/debian/pool/main/libx/libxcvt/libxcvt0_0.1.2-1_${ARCH}.deb; \ + apt-get install --no-install-recommends ./libxcvt0_0.1.2-1_${ARCH}.deb; \ + rm ./libxcvt0_0.1.2-1_${ARCH}.deb; \ + # + # workaround for an X11 problem: http://blog.tigerteufel.de/?p=476 + mkdir /tmp/.X11-unix; \ + chmod 1777 /tmp/.X11-unix; \ + chown $USERNAME /tmp/.X11-unix/; \ + # + # make directories for neko + mkdir -p /etc/neko /var/www /var/log/neko \ + /tmp/runtime-$USERNAME \ + /home/$USERNAME/.config/pulse \ + /home/$USERNAME/.local/share/xorg; \ + chmod 1777 /var/log/neko; \ + chown $USERNAME /var/log/neko/ /tmp/runtime-$USERNAME; \ + chown -R $USERNAME:$USERNAME /home/$USERNAME; \ + # clean up + apt-get clean -y; \ + rm -rf /var/lib/apt/lists/* /var/cache/apt/ + +# install chromium and sqlite3 for debugging the cookies file +RUN add-apt-repository -y ppa:xtradeb/apps +RUN apt update -y && apt install -y chromium sqlite3 + +# setup desktop env & app +ENV DISPLAY_NUM=1 +ENV HEIGHT=768 +ENV WIDTH=1024 +ENV WITHDOCKER=true + +# Copy kernel-images configuration and binaries +COPY kernel-images/images/chromium-headful/xorg.conf /etc/neko/xorg.conf +COPY kernel-images/images/chromium-headful/neko.yaml /etc/neko/neko.yaml +COPY --from=neko /usr/bin/neko /usr/bin/neko +COPY --from=client /src/dist/ /var/www +COPY --from=xorg-deps /usr/local/lib/xorg/modules/drivers/dummy_drv.so /usr/lib/xorg/modules/drivers/dummy_drv.so +COPY --from=xorg-deps /usr/local/lib/xorg/modules/input/neko_drv.so /usr/lib/xorg/modules/input/neko_drv.so + +COPY kernel-images/images/chromium-headful/image-chromium/ / +COPY kernel-images/images/chromium-headful/start-chromium.sh /images/chromium-headful/start-chromium.sh +RUN chmod +x /images/chromium-headful/start-chromium.sh +COPY kernel-images/images/chromium-headful/wrapper.sh /wrapper.sh +COPY kernel-images/images/chromium-headful/supervisord.conf /etc/supervisor/supervisord.conf +COPY kernel-images/images/chromium-headful/supervisor/services/ /etc/supervisor/conf.d/services/ + +# copy the kernel-images API binary built in the builder stage +COPY --from=server-builder /out/kernel-images-api /usr/local/bin/kernel-images-api + +# ============================================================================ +# DevTools Integration +# ============================================================================ + +# Copy DevTools static files from builder +COPY --from=devtools-builder /workspace/devtools/devtools-frontend/out/Default/gen/front_end /usr/share/nginx/devtools + +# Create DevTools nginx configuration +COPY nginx-devtools.conf /etc/nginx/sites-available/devtools +RUN ln -s /etc/nginx/sites-available/devtools /etc/nginx/sites-enabled/devtools && \ + rm /etc/nginx/sites-enabled/default + +# Add DevTools nginx service to supervisor +COPY supervisor/services/nginx-devtools.conf /etc/supervisor/conf.d/services/nginx-devtools.conf + +# Create nginx temp directories and set permissions +RUN mkdir -p /var/lib/nginx/body \ + /var/lib/nginx/proxy \ + /var/lib/nginx/fastcgi \ + /var/lib/nginx/uwsgi \ + /var/lib/nginx/scgi && \ + chown -R www-data:www-data /var/lib/nginx && \ + chown -R www-data:www-data /usr/share/nginx/devtools + +RUN useradd -m -s /bin/bash kernel + +ENTRYPOINT [ "/wrapper.sh" ] \ No newline at end of file diff --git a/Makefile b/Makefile index 9fdfcde..e0415bc 100644 --- a/Makefile +++ b/Makefile @@ -103,4 +103,62 @@ native-run: ## Run using kernel-images native script directly UKC_TOKEN=dummy-token UKC_METRO=dummy-metro IMAGE=kernel-browser:local NAME=kernel-browser-local ENABLE_WEBRTC=true ./run-docker.sh # Quick development workflow -quick: init build compose-up test ## Quick setup: init + build + run + test \ No newline at end of file +quick: init build compose-up test ## Quick setup: init + build + run + test + +# ============================================================================ +# Extended targets with DevTools frontend +# ============================================================================ + +build-extended: init ## Build extended image with DevTools frontend + @echo "๐Ÿ”จ Building extended kernel-browser with DevTools frontend..." + docker build -f Dockerfile.local-extended -t kernel-browser:extended . + @echo "โœ… Extended build complete" + +run-extended: ## Run extended container with DevTools (interactive) + @echo "๐Ÿš€ Starting extended kernel-browser with DevTools..." + ./run-local-extended.sh + +info-extended: ## Show extended connection information + @echo "" + @echo "๐ŸŒ Extended Service Access Points:" + @echo " WebRTC Client: http://localhost:8080" + @echo " Chrome DevTools: http://localhost:9222/json" + @echo " Recording API: http://localhost:444/api" + @echo " Enhanced DevTools UI: http://localhost:8001" + @echo " DevTools Health: http://localhost:8001/health" + +test-extended: ## Test extended service endpoints including DevTools + @echo "๐Ÿงช Testing extended service endpoints..." + @echo -n "WebRTC Client (8080): " + @curl -s -o /dev/null -w "%{http_code}" http://localhost:8080/ || echo "Failed to connect" + @echo "" + @echo -n "Chrome DevTools (9222): " + @curl -s -o /dev/null -w "%{http_code}" http://localhost:9222/json/version || echo "Failed to connect" + @echo "" + @echo -n "Recording API (444): " + @curl -s -o /dev/null -w "%{http_code}" http://localhost:444/ && echo " (404 is normal - API is running)" || echo "Failed to connect" + @echo "" + @echo -n "DevTools UI (8001): " + @curl -s -o /dev/null -w "%{http_code}" http://localhost:8001/ || echo "Failed to connect" + @echo "" + @echo -n "DevTools Health (8001): " + @curl -s -o /dev/null -w "%{http_code}" http://localhost:8001/health || echo "Failed to connect" + @echo "" + @echo "๐ŸŽฏ All extended services are ready! Access points:" + @echo " WebRTC Client: http://localhost:8080" + @echo " Chrome DevTools: http://localhost:9222/json" + @echo " Enhanced DevTools UI: http://localhost:8001" + +stop-extended: ## Stop extended container + @echo "๐Ÿ›‘ Stopping extended containers..." + docker stop kernel-browser-extended 2>/dev/null || true + docker rm kernel-browser-extended 2>/dev/null || true + @echo "โœ… Extended containers stopped" + +clean-extended: stop-extended ## Clean up extended containers and images + @echo "๐Ÿงน Cleaning up extended resources..." + docker rmi kernel-browser:extended 2>/dev/null || true + @echo "โœ… Extended cleanup complete" + +# Extended workflow +quick-extended: init build-extended run-extended ## Quick extended setup: init + build + run with DevTools \ No newline at end of file diff --git a/nginx-devtools.conf b/nginx-devtools.conf new file mode 100644 index 0000000..42db1a1 --- /dev/null +++ b/nginx-devtools.conf @@ -0,0 +1,79 @@ +server { + listen 8001; + listen [::]:8001; + server_name localhost; + + # Root directory for DevTools frontend + root /usr/share/nginx/devtools; + index inspector.html devtools_app.html index.html; + + # Compression + gzip on; + gzip_vary on; + gzip_min_length 1024; + gzip_types text/plain text/css text/xml text/javascript application/javascript application/xml+rss application/json application/wasm; + + # Security headers + add_header X-Frame-Options "SAMEORIGIN" always; + add_header X-Content-Type-Options "nosniff" always; + add_header X-XSS-Protection "1; mode=block" always; + + # CORS headers for DevTools + add_header Access-Control-Allow-Origin "*" always; + add_header Access-Control-Allow-Methods "GET, POST, OPTIONS" always; + add_header Access-Control-Allow-Headers "DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range" always; + + # Handle OPTIONS requests + if ($request_method = 'OPTIONS') { + return 204; + } + + # Cache control for static assets + location ~* \.(jpg|jpeg|png|gif|ico|css|js|svg|woff|woff2|ttf|eot|avif)$ { + expires 1d; + add_header Cache-Control "public, immutable"; + } + + # Specific handling for WebAssembly files + location ~ \.wasm$ { + add_header Content-Type application/wasm; + } + + # JSON files + location ~ \.json$ { + add_header Content-Type application/json; + } + + # Main location + location / { + try_files $uri $uri/ /index.html; + } + + # Specific paths for DevTools + location /front_end/ { + alias /usr/share/nginx/devtools/; + try_files $uri $uri/ =404; + } + + # Health check for DevTools service + location /health { + access_log off; + add_header Content-Type application/json; + return 200 '{"status": "healthy", "service": "devtools-frontend"}'; + } + + # Error pages + error_page 404 /404.html; + location = /404.html { + internal; + } + + error_page 500 502 503 504 /50x.html; + location = /50x.html { + internal; + } + + # Logging + access_log /var/log/nginx/devtools-access.log; + error_log /var/log/nginx/devtools-error.log warn; +} \ No newline at end of file diff --git a/run-local-extended.sh b/run-local-extended.sh new file mode 100755 index 0000000..dc4cbe0 --- /dev/null +++ b/run-local-extended.sh @@ -0,0 +1,71 @@ +#!/usr/bin/env bash + +# Extended local run wrapper for kernel-images chromium-headful + DevTools +set -e -o pipefail + +echo "๐Ÿš€ Starting kernel-browser (EXTENDED) locally using kernel-images run system..." + +# Ensure we're in the right directory +SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd) +cd "$SCRIPT_DIR" + +# Check if kernel-images submodule exists +if [ ! -d "kernel-images" ] || [ ! -f "kernel-images/images/chromium-headful/run-docker.sh" ]; then + echo "โŒ Error: kernel-images submodule not found or incomplete" + echo " Run: git submodule update --init --recursive" + exit 1 +fi + +# Create local recordings directory +mkdir -p "$SCRIPT_DIR/recordings" + +# Change to kernel-images directory +cd kernel-images/images/chromium-headful + +# Make run script executable +chmod +x run-docker.sh + +# Set environment variables for extended local development +export IMAGE="kernel-browser:extended" +export NAME="kernel-browser-extended" +export ENABLE_WEBRTC="true" +export RUN_AS_ROOT="false" + +# Set dummy UKC variables to bypass cloud requirements (we only need Docker) +export UKC_TOKEN="dummy-token-for-local-run" +export UKC_METRO="dummy-metro-for-local-run" + +# Local-friendly Chrome flags (less restrictive than cloud) + custom DevTools frontend +export CHROMIUM_FLAGS="--user-data-dir=/home/kernel/user-data --disable-dev-shm-usage --start-maximized --remote-allow-origins=* --no-sandbox --disable-setuid-sandbox --custom-devtools-frontend=http://localhost:8001/" + +echo "๐Ÿ”ง Configuration:" +echo " Image: $IMAGE" +echo " Container: $NAME" +echo " WebRTC: $ENABLE_WEBRTC" +echo " DevTools UI: enabled" +echo " Run as root: $RUN_AS_ROOT" +echo " Recordings: $SCRIPT_DIR/recordings" +echo "" + +echo "๐Ÿƒ Starting extended container with kernel-images run system..." + +# Backup original run-docker.sh to modify port mappings +if [ ! -f run-docker.sh.original ]; then + cp run-docker.sh run-docker.sh.original +fi + +# Create modified run script that adds DevTools port mapping +cat run-docker.sh.original | \ +sed 's/docker run -it/docker run -it -p 8001:8001/' > run-docker.sh.extended + +chmod +x run-docker.sh.extended + +# Run using the modified run script with DevTools port +./run-docker.sh.extended + +echo "" +echo "๐ŸŒ Extended service should be accessible at:" +echo " WebRTC Client: http://localhost:8080" +echo " Chrome DevTools: http://localhost:9222" +echo " Recording API: http://localhost:444" +echo " Enhanced DevTools UI: http://localhost:8001" \ No newline at end of file diff --git a/supervisor/services/nginx-devtools.conf b/supervisor/services/nginx-devtools.conf new file mode 100644 index 0000000..442cf08 --- /dev/null +++ b/supervisor/services/nginx-devtools.conf @@ -0,0 +1,15 @@ +[program:nginx-devtools] +command=nginx -g 'daemon off;' +autostart=true +autorestart=true +startretries=3 +user=root +stdout_logfile=/var/log/nginx-devtools-stdout.log +stderr_logfile=/var/log/nginx-devtools-stderr.log +stdout_logfile_maxbytes=10MB +stderr_logfile_maxbytes=10MB +stdout_logfile_backups=3 +stderr_logfile_backups=3 +redirect_stderr=false +killasgroup=true +stopasgroup=true \ No newline at end of file From 0ad4e72cb6bfdd08cf089da8e2f40ca1b9d0fd9c Mon Sep 17 00:00:00 2001 From: Oleh Luchkiv Date: Tue, 2 Sep 2025 15:15:59 -0500 Subject: [PATCH 04/25] Custom Devtools working on CloudRun --- Dockerfile.cloudrun | 87 ++++++++++++++++++-- cloudbuild.yaml | 18 ++--- cloudrun-wrapper.sh | 148 ++++++++++++++++++++++++++++++++--- nginx-devtools-cloudrun.conf | 105 +++++++++++++++++++++++++ nginx.conf | 16 +++- service.yaml | 14 ++-- 6 files changed, 355 insertions(+), 33 deletions(-) create mode 100644 nginx-devtools-cloudrun.conf diff --git a/Dockerfile.cloudrun b/Dockerfile.cloudrun index 8c3e812..78776be 100644 --- a/Dockerfile.cloudrun +++ b/Dockerfile.cloudrun @@ -1,5 +1,56 @@ +# DevTools Frontend build stage using browser-operator-core +FROM --platform=linux/amd64 ubuntu:22.04 AS devtools-builder + +# Install required packages for DevTools frontend build +RUN apt-get update && apt-get install -y \ + curl \ + git \ + python3 \ + python3-pip \ + python-is-python3 \ + wget \ + unzip \ + sudo \ + ca-certificates \ + build-essential \ + && rm -rf /var/lib/apt/lists/* + +# Install Node.js 18.x +RUN curl -fsSL https://deb.nodesource.com/setup_18.x | bash - && \ + apt-get install -y nodejs && \ + rm -rf /var/lib/apt/lists/* + +WORKDIR /workspace + +# Clone depot_tools +RUN git clone https://chromium.googlesource.com/chromium/tools/depot_tools.git +ENV PATH="/workspace/depot_tools:${PATH}" +ENV DEPOT_TOOLS_UPDATE=0 + +# Follow README instructions exactly - fetching code +RUN mkdir devtools +WORKDIR /workspace/devtools +RUN fetch devtools-frontend + +# Build steps +WORKDIR /workspace/devtools/devtools-frontend + +RUN gclient sync +RUN /workspace/depot_tools/ensure_bootstrap + +# Build standard DevTools first +RUN npm run build + +# Add Browser Operator fork and switch to it +RUN git remote add upstream https://github.com/BrowserOperator/browser-operator-core.git +RUN git fetch upstream +RUN git checkout upstream/main + +# Build Browser Operator version +RUN npm run build + # Multi-stage build using kernel-images as base -FROM docker.io/golang:1.25.0 AS server-builder +FROM docker.io/golang:1.23.0 AS server-builder WORKDIR /workspace/server ARG TARGETOS @@ -90,6 +141,12 @@ RUN apt-get update && \ nginx \ # PPA req software-properties-common && \ + # Disable nginx auto-start to prevent conflicts with custom config + systemctl disable nginx || true && \ + systemctl mask nginx || true && \ + # Remove default nginx config to prevent conflicts + rm -f /etc/nginx/sites-enabled/default && \ + rm -f /etc/nginx/nginx.conf && \ # Userland apps sudo add-apt-repository ppa:mozillateam/ppa && \ sudo apt-get install -y --no-install-recommends \ @@ -186,19 +243,39 @@ COPY kernel-images/images/chromium-headful/supervisor/services/ /etc/supervisor/ # Copy the kernel-images API binary COPY --from=server-builder /out/kernel-images-api /usr/local/bin/kernel-images-api -# Cloud Run specific: nginx configuration for port proxying -COPY nginx.conf /etc/nginx/nginx.conf +# ============================================================================ +# DevTools Integration +# ============================================================================ + +# Copy DevTools static files from builder +COPY --from=devtools-builder /workspace/devtools/devtools-frontend/out/Default/gen/front_end /usr/share/nginx/devtools + +# Set permissions for DevTools files +RUN chown -R kernel:kernel /usr/share/nginx/devtools + +# Cloud Run specific: wrapper script only (nginx config is inline) +# DO NOT copy nginx.conf to avoid auto-start conflicts COPY cloudrun-wrapper.sh /cloudrun-wrapper.sh RUN chmod +x /cloudrun-wrapper.sh +# Add essential services for neko WebRTC and Chromium +COPY supervisor/services-cloudrun/dbus.conf /etc/supervisor/conf.d/services-cloudrun/dbus.conf +COPY supervisor/services-cloudrun/xorg.conf /etc/supervisor/conf.d/services-cloudrun/xorg.conf +COPY supervisor/services-cloudrun/neko.conf /etc/supervisor/conf.d/services-cloudrun/neko.conf +COPY supervisor/services-cloudrun/chromium.conf /etc/supervisor/conf.d/services-cloudrun/chromium.conf +COPY supervisor/services-cloudrun/devtools-frontend.conf /etc/supervisor/conf.d/services-cloudrun/devtools-frontend.conf + # Create nginx temp directories for non-root execution RUN mkdir -p /tmp/nginx_client_temp /tmp/nginx_proxy_temp /tmp/nginx_fastcgi_temp \ - /tmp/nginx_uwsgi_temp /tmp/nginx_scgi_temp && \ + /tmp/nginx_uwsgi_temp /tmp/nginx_scgi_temp \ + /tmp/nginx_devtools_client_temp /tmp/nginx_devtools_proxy_temp /tmp/nginx_devtools_fastcgi_temp \ + /tmp/nginx_devtools_uwsgi_temp /tmp/nginx_devtools_scgi_temp && \ chown -R kernel:kernel /tmp/nginx_* # Create supervisor log directories RUN mkdir -p /var/log/supervisord/chromium /var/log/supervisord/neko /var/log/supervisord/xorg \ - /var/log/supervisord/dbus /var/log/supervisord/kernel-images-api /var/log/supervisord/mutter && \ + /var/log/supervisord/dbus /var/log/supervisord/kernel-images-api /var/log/supervisord/mutter \ + /var/log/supervisord/nginx /var/log/supervisord/devtools-frontend && \ chown -R kernel:kernel /var/log/supervisord # Create health check endpoint diff --git a/cloudbuild.yaml b/cloudbuild.yaml index 37dac28..3470f89 100644 --- a/cloudbuild.yaml +++ b/cloudbuild.yaml @@ -18,26 +18,26 @@ steps: - '-c' - | echo "Attempting to pull previous image for caching..." - docker pull gcr.io/$PROJECT_ID/kernel-browser:latest || echo "No previous image found for caching" + docker pull us-docker.pkg.dev/$PROJECT_ID/gcr.io/kernel-browser:latest || echo "No previous image found for caching" - # Step 3: Build the Docker image with caching (using kernel-cloud Dockerfile) + # Step 3: Build the Docker image with caching (using cloudrun Dockerfile) - name: 'gcr.io/cloud-builders/docker' args: - 'build' - '--file' - - 'Dockerfile.kernel-cloud' + - 'Dockerfile.cloudrun' - '--cache-from' - - 'gcr.io/$PROJECT_ID/kernel-browser:latest' + - 'us-docker.pkg.dev/$PROJECT_ID/gcr.io/kernel-browser:latest' - '--tag' - - 'gcr.io/$PROJECT_ID/kernel-browser:latest' + - 'us-docker.pkg.dev/$PROJECT_ID/gcr.io/kernel-browser:latest' - '.' timeout: '3600s' # Allow 1 hour for build (it's a large image) - # Step 4: Push the image to Google Container Registry + # Step 4: Push the image to Artifact Registry - name: 'gcr.io/cloud-builders/docker' args: - 'push' - - 'gcr.io/$PROJECT_ID/kernel-browser:latest' + - 'us-docker.pkg.dev/$PROJECT_ID/gcr.io/kernel-browser:latest' # Step 5: Update the service.yaml with the correct project ID - name: 'gcr.io/cloud-builders/gcloud' @@ -89,9 +89,9 @@ options: # Allocate disk space for the large build diskSizeGb: 100 -# Images to be pushed to Container Registry +# Images to be pushed to Artifact Registry images: - - 'gcr.io/$PROJECT_ID/kernel-browser:latest' + - 'us-docker.pkg.dev/$PROJECT_ID/gcr.io/kernel-browser:latest' # Tags for organization diff --git a/cloudrun-wrapper.sh b/cloudrun-wrapper.sh index a62fdf6..8e91d01 100644 --- a/cloudrun-wrapper.sh +++ b/cloudrun-wrapper.sh @@ -11,23 +11,23 @@ export ENABLE_WEBRTC=true export DISPLAY_NUM=1 export HEIGHT=768 export WIDTH=1024 +export NEKO_BIND=:8081 # Port configuration for Cloud Run export PORT=${PORT:-8080} -export CHROMIUM_FLAGS="${CHROMIUM_FLAGS:---user-data-dir=/home/kernel/user-data --disable-dev-shm-usage --disable-gpu --start-maximized --disable-software-rasterizer --remote-allow-origins=* --no-sandbox --disable-setuid-sandbox --disable-features=VizDisplayCompositor}" +export CHROMIUM_FLAGS="${CHROMIUM_FLAGS:---user-data-dir=/home/kernel/user-data --disable-dev-shm-usage --disable-gpu --start-maximized --disable-software-rasterizer --remote-allow-origins=* --no-sandbox --disable-setuid-sandbox --disable-features=VizDisplayCompositor --custom-devtools-frontend=http://localhost:8001/ https://www.google.com}" # Setup directories with proper permissions mkdir -p /tmp/nginx_client_temp /tmp/nginx_proxy_temp /tmp/nginx_fastcgi_temp \ /tmp/nginx_uwsgi_temp /tmp/nginx_scgi_temp \ + /tmp/nginx_devtools_client_temp /tmp/nginx_devtools_proxy_temp /tmp/nginx_devtools_fastcgi_temp \ + /tmp/nginx_devtools_uwsgi_temp /tmp/nginx_devtools_scgi_temp \ /home/kernel/user-data /home/kernel/.config /home/kernel/.cache \ - /tmp/runtime-kernel /var/log/neko /tmp/recordings + /tmp/runtime-kernel /var/log/neko /tmp/recordings \ + /tmp/supervisord /tmp/dbus -# Test nginx configuration -echo "[cloudrun-wrapper] Testing nginx configuration..." -if ! nginx -t; then - echo "[cloudrun-wrapper] ERROR: nginx configuration test failed" - exit 1 -fi +# Skip nginx test - supervisor will handle nginx startup +echo "[cloudrun-wrapper] Skipping nginx test - supervisor manages nginx" # Start supervisor for kernel-images services in background echo "[cloudrun-wrapper] Starting kernel-images services..." @@ -45,6 +45,134 @@ cleanup() { } trap cleanup TERM INT -# Start nginx in foreground (main process for Cloud Run) +# Start nginx proxy on Cloud Run port (proxies directly to services) echo "[cloudrun-wrapper] Starting nginx proxy on port $PORT" -nginx -g "daemon off;" \ No newline at end of file + +# Create nginx config file +cat > /tmp/nginx.conf < Date: Thu, 4 Sep 2025 09:47:52 -0500 Subject: [PATCH 05/25] Local setup --- Dockerfile.local-extended => Dockerfile.local | 0 Makefile | 97 +++++-------------- build-local.sh | 34 ++----- run-local-extended.sh | 71 -------------- run-local.sh | 41 +++++--- 5 files changed, 61 insertions(+), 182 deletions(-) rename Dockerfile.local-extended => Dockerfile.local (100%) delete mode 100755 run-local-extended.sh diff --git a/Dockerfile.local-extended b/Dockerfile.local similarity index 100% rename from Dockerfile.local-extended rename to Dockerfile.local diff --git a/Makefile b/Makefile index e0415bc..24c7cdc 100644 --- a/Makefile +++ b/Makefile @@ -15,13 +15,13 @@ init: ## Initialize submodules (run this first) git submodule update --init --recursive @echo "โœ… Submodules initialized" -build: init ## Build using kernel-images build system - @echo "๐Ÿ”จ Building with kernel-images native build system..." - ./build-local.sh - @echo "โœ… Build complete" +build: init ## Build extended image with DevTools frontend + @echo "๐Ÿ”จ Building extended kernel-browser with DevTools frontend..." + docker build -f Dockerfile.local -t kernel-browser:extended . + @echo "โœ… Extended build complete" -run: ## Run using kernel-images run system (interactive) - @echo "๐Ÿš€ Starting kernel-browser using native run script..." +run: ## Run extended container with DevTools (interactive) + @echo "๐Ÿš€ Starting extended kernel-browser with DevTools..." ./run-local.sh compose-up: build ## Start with docker-compose (background) @@ -40,8 +40,8 @@ dev: compose-dev ## Alias for compose-dev stop: ## Stop all containers @echo "๐Ÿ›‘ Stopping containers..." docker-compose down - docker stop kernel-browser-local 2>/dev/null || true - docker rm kernel-browser-local 2>/dev/null || true + docker stop kernel-browser-extended 2>/dev/null || true + docker rm kernel-browser-extended 2>/dev/null || true @echo "โœ… Containers stopped" restart: ## Restart containers @@ -59,15 +59,16 @@ status: ## Show container status @docker ps --filter name=kernel-browser shell: ## Get shell access to running container - docker exec -it kernel-browser-local bash || docker-compose exec kernel-browser bash + docker exec -it kernel-browser-extended bash || docker-compose exec kernel-browser bash info: ## Show connection information @echo "" @echo "๐ŸŒ Service Access Points:" - @echo " WebRTC Client: http://localhost:8080" - @echo " Chrome DevTools: http://localhost:9222/json" - @echo " Recording API: http://localhost:444/api" - @echo " Health Check: http://localhost:8080/" + @echo " WebRTC Client: http://localhost:8080" + @echo " Chrome DevTools: http://localhost:9222/json" + @echo " Recording API: http://localhost:444/api" + @echo " Enhanced DevTools UI: http://localhost:8001" + @echo " DevTools Health: http://localhost:8001/health" test: ## Test service endpoints @echo "๐Ÿงช Testing service endpoints..." @@ -80,14 +81,21 @@ test: ## Test service endpoints @echo -n "Recording API (444): " @curl -s -o /dev/null -w "%{http_code}" http://localhost:444/ && echo " (404 is normal - API is running)" || echo "Failed to connect" @echo "" + @echo -n "DevTools UI (8001): " + @curl -s -o /dev/null -w "%{http_code}" http://localhost:8001/ || echo "Failed to connect" + @echo "" + @echo -n "DevTools Health (8001): " + @curl -s -o /dev/null -w "%{http_code}" http://localhost:8001/health || echo "Failed to connect" + @echo "" @echo "๐ŸŽฏ All services are ready! Access points:" - @echo " WebRTC Client: http://localhost:8080" - @echo " Chrome DevTools: http://localhost:9222/json" + @echo " WebRTC Client: http://localhost:8080" + @echo " Chrome DevTools: http://localhost:9222/json" + @echo " Enhanced DevTools UI: http://localhost:8001" clean: stop ## Clean up everything @echo "๐Ÿงน Cleaning up..." docker-compose down -v 2>/dev/null || true - docker rmi kernel-browser:local 2>/dev/null || true + docker rmi kernel-browser:extended 2>/dev/null || true docker system prune -f rm -rf recordings/* 2>/dev/null || true rm -rf kernel-images/images/chromium-headful/.tmp 2>/dev/null || true @@ -105,60 +113,3 @@ native-run: ## Run using kernel-images native script directly # Quick development workflow quick: init build compose-up test ## Quick setup: init + build + run + test -# ============================================================================ -# Extended targets with DevTools frontend -# ============================================================================ - -build-extended: init ## Build extended image with DevTools frontend - @echo "๐Ÿ”จ Building extended kernel-browser with DevTools frontend..." - docker build -f Dockerfile.local-extended -t kernel-browser:extended . - @echo "โœ… Extended build complete" - -run-extended: ## Run extended container with DevTools (interactive) - @echo "๐Ÿš€ Starting extended kernel-browser with DevTools..." - ./run-local-extended.sh - -info-extended: ## Show extended connection information - @echo "" - @echo "๐ŸŒ Extended Service Access Points:" - @echo " WebRTC Client: http://localhost:8080" - @echo " Chrome DevTools: http://localhost:9222/json" - @echo " Recording API: http://localhost:444/api" - @echo " Enhanced DevTools UI: http://localhost:8001" - @echo " DevTools Health: http://localhost:8001/health" - -test-extended: ## Test extended service endpoints including DevTools - @echo "๐Ÿงช Testing extended service endpoints..." - @echo -n "WebRTC Client (8080): " - @curl -s -o /dev/null -w "%{http_code}" http://localhost:8080/ || echo "Failed to connect" - @echo "" - @echo -n "Chrome DevTools (9222): " - @curl -s -o /dev/null -w "%{http_code}" http://localhost:9222/json/version || echo "Failed to connect" - @echo "" - @echo -n "Recording API (444): " - @curl -s -o /dev/null -w "%{http_code}" http://localhost:444/ && echo " (404 is normal - API is running)" || echo "Failed to connect" - @echo "" - @echo -n "DevTools UI (8001): " - @curl -s -o /dev/null -w "%{http_code}" http://localhost:8001/ || echo "Failed to connect" - @echo "" - @echo -n "DevTools Health (8001): " - @curl -s -o /dev/null -w "%{http_code}" http://localhost:8001/health || echo "Failed to connect" - @echo "" - @echo "๐ŸŽฏ All extended services are ready! Access points:" - @echo " WebRTC Client: http://localhost:8080" - @echo " Chrome DevTools: http://localhost:9222/json" - @echo " Enhanced DevTools UI: http://localhost:8001" - -stop-extended: ## Stop extended container - @echo "๐Ÿ›‘ Stopping extended containers..." - docker stop kernel-browser-extended 2>/dev/null || true - docker rm kernel-browser-extended 2>/dev/null || true - @echo "โœ… Extended containers stopped" - -clean-extended: stop-extended ## Clean up extended containers and images - @echo "๐Ÿงน Cleaning up extended resources..." - docker rmi kernel-browser:extended 2>/dev/null || true - @echo "โœ… Extended cleanup complete" - -# Extended workflow -quick-extended: init build-extended run-extended ## Quick extended setup: init + build + run with DevTools \ No newline at end of file diff --git a/build-local.sh b/build-local.sh index 975196e..b388617 100755 --- a/build-local.sh +++ b/build-local.sh @@ -1,9 +1,9 @@ #!/usr/bin/env bash -# Local build wrapper for kernel-images chromium-headful +# Extended local build wrapper for kernel-browser with DevTools set -e -o pipefail -echo "๐Ÿ”จ Building kernel-browser using kernel-images build system..." +echo "๐Ÿ”จ Building extended kernel-browser with DevTools frontend..." # Ensure we're in the right directory SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd) @@ -22,29 +22,15 @@ if [ ! -f "kernel-images/images/chromium-headful/build-docker.sh" ]; then exit 1 fi -# Change to kernel-images directory and build using their system -echo "๐Ÿ“ Changing to kernel-images directory..." -cd kernel-images/images/chromium-headful +echo "๐Ÿš€ Starting extended build with Docker..." +echo " Using: Dockerfile.local" +echo " Target image: kernel-browser:extended" -# Make build script executable -chmod +x build-docker.sh +# Build using Docker with extended Dockerfile +docker build -f Dockerfile.local -t kernel-browser:extended . -# Set image name for local use -export IMAGE="kernel-browser:local" -export NAME="kernel-browser-local" - -# Set dummy UKC variables to bypass cloud requirements (we only need Docker) -export UKC_TOKEN="dummy-token-for-local-build" -export UKC_METRO="dummy-metro-for-local-build" - -echo "๐Ÿš€ Starting build with kernel-images build system..." -echo " Image: $IMAGE" -echo " Bypassing UKC requirements for local Docker build..." - -# Run the official build script -./build-docker.sh - -echo "โœ… Build completed successfully!" -echo " Image built: $IMAGE" +echo "โœ… Extended build completed successfully!" +echo " Image built: kernel-browser:extended" +echo " Includes: Chromium + DevTools frontend + WebRTC" echo "" echo "๐Ÿƒ To run locally, use: ./run-local.sh" \ No newline at end of file diff --git a/run-local-extended.sh b/run-local-extended.sh deleted file mode 100755 index dc4cbe0..0000000 --- a/run-local-extended.sh +++ /dev/null @@ -1,71 +0,0 @@ -#!/usr/bin/env bash - -# Extended local run wrapper for kernel-images chromium-headful + DevTools -set -e -o pipefail - -echo "๐Ÿš€ Starting kernel-browser (EXTENDED) locally using kernel-images run system..." - -# Ensure we're in the right directory -SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd) -cd "$SCRIPT_DIR" - -# Check if kernel-images submodule exists -if [ ! -d "kernel-images" ] || [ ! -f "kernel-images/images/chromium-headful/run-docker.sh" ]; then - echo "โŒ Error: kernel-images submodule not found or incomplete" - echo " Run: git submodule update --init --recursive" - exit 1 -fi - -# Create local recordings directory -mkdir -p "$SCRIPT_DIR/recordings" - -# Change to kernel-images directory -cd kernel-images/images/chromium-headful - -# Make run script executable -chmod +x run-docker.sh - -# Set environment variables for extended local development -export IMAGE="kernel-browser:extended" -export NAME="kernel-browser-extended" -export ENABLE_WEBRTC="true" -export RUN_AS_ROOT="false" - -# Set dummy UKC variables to bypass cloud requirements (we only need Docker) -export UKC_TOKEN="dummy-token-for-local-run" -export UKC_METRO="dummy-metro-for-local-run" - -# Local-friendly Chrome flags (less restrictive than cloud) + custom DevTools frontend -export CHROMIUM_FLAGS="--user-data-dir=/home/kernel/user-data --disable-dev-shm-usage --start-maximized --remote-allow-origins=* --no-sandbox --disable-setuid-sandbox --custom-devtools-frontend=http://localhost:8001/" - -echo "๐Ÿ”ง Configuration:" -echo " Image: $IMAGE" -echo " Container: $NAME" -echo " WebRTC: $ENABLE_WEBRTC" -echo " DevTools UI: enabled" -echo " Run as root: $RUN_AS_ROOT" -echo " Recordings: $SCRIPT_DIR/recordings" -echo "" - -echo "๐Ÿƒ Starting extended container with kernel-images run system..." - -# Backup original run-docker.sh to modify port mappings -if [ ! -f run-docker.sh.original ]; then - cp run-docker.sh run-docker.sh.original -fi - -# Create modified run script that adds DevTools port mapping -cat run-docker.sh.original | \ -sed 's/docker run -it/docker run -it -p 8001:8001/' > run-docker.sh.extended - -chmod +x run-docker.sh.extended - -# Run using the modified run script with DevTools port -./run-docker.sh.extended - -echo "" -echo "๐ŸŒ Extended service should be accessible at:" -echo " WebRTC Client: http://localhost:8080" -echo " Chrome DevTools: http://localhost:9222" -echo " Recording API: http://localhost:444" -echo " Enhanced DevTools UI: http://localhost:8001" \ No newline at end of file diff --git a/run-local.sh b/run-local.sh index cfab5c4..dc4cbe0 100755 --- a/run-local.sh +++ b/run-local.sh @@ -1,9 +1,9 @@ #!/usr/bin/env bash -# Local run wrapper for kernel-images chromium-headful +# Extended local run wrapper for kernel-images chromium-headful + DevTools set -e -o pipefail -echo "๐Ÿš€ Starting kernel-browser locally using kernel-images run system..." +echo "๐Ÿš€ Starting kernel-browser (EXTENDED) locally using kernel-images run system..." # Ensure we're in the right directory SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd) @@ -25,9 +25,9 @@ cd kernel-images/images/chromium-headful # Make run script executable chmod +x run-docker.sh -# Set environment variables for local development -export IMAGE="kernel-browser:local" -export NAME="kernel-browser-local" +# Set environment variables for extended local development +export IMAGE="kernel-browser:extended" +export NAME="kernel-browser-extended" export ENABLE_WEBRTC="true" export RUN_AS_ROOT="false" @@ -35,24 +35,37 @@ export RUN_AS_ROOT="false" export UKC_TOKEN="dummy-token-for-local-run" export UKC_METRO="dummy-metro-for-local-run" -# Local-friendly Chrome flags (less restrictive than cloud) -export CHROMIUM_FLAGS="--user-data-dir=/home/kernel/user-data --disable-dev-shm-usage --start-maximized --remote-allow-origins=* --no-sandbox --disable-setuid-sandbox" +# Local-friendly Chrome flags (less restrictive than cloud) + custom DevTools frontend +export CHROMIUM_FLAGS="--user-data-dir=/home/kernel/user-data --disable-dev-shm-usage --start-maximized --remote-allow-origins=* --no-sandbox --disable-setuid-sandbox --custom-devtools-frontend=http://localhost:8001/" echo "๐Ÿ”ง Configuration:" echo " Image: $IMAGE" echo " Container: $NAME" echo " WebRTC: $ENABLE_WEBRTC" +echo " DevTools UI: enabled" echo " Run as root: $RUN_AS_ROOT" echo " Recordings: $SCRIPT_DIR/recordings" echo "" -echo "๐Ÿƒ Starting container with kernel-images run system..." +echo "๐Ÿƒ Starting extended container with kernel-images run system..." -# Run using the official run script -./run-docker.sh +# Backup original run-docker.sh to modify port mappings +if [ ! -f run-docker.sh.original ]; then + cp run-docker.sh run-docker.sh.original +fi + +# Create modified run script that adds DevTools port mapping +cat run-docker.sh.original | \ +sed 's/docker run -it/docker run -it -p 8001:8001/' > run-docker.sh.extended + +chmod +x run-docker.sh.extended + +# Run using the modified run script with DevTools port +./run-docker.sh.extended echo "" -echo "๐ŸŒ Service should be accessible at:" -echo " WebRTC Client: http://localhost:8080" -echo " Chrome DevTools: http://localhost:9222" -echo " Recording API: http://localhost:444" \ No newline at end of file +echo "๐ŸŒ Extended service should be accessible at:" +echo " WebRTC Client: http://localhost:8080" +echo " Chrome DevTools: http://localhost:9222" +echo " Recording API: http://localhost:444" +echo " Enhanced DevTools UI: http://localhost:8001" \ No newline at end of file From 25522854f8a828a35bae1e2c9e22368faea36472 Mon Sep 17 00:00:00 2001 From: Oleh Luchkiv Date: Mon, 8 Sep 2025 17:20:39 -0500 Subject: [PATCH 06/25] Using Twilio + CloudRun to deploy devtools frontend with Chromium --- .env.example | 17 ++ .gitignore | 54 ++++++ DEPLOYMENT.md | 166 +++++++++++++++++ Dockerfile.cloudrun | 10 +- cloudbuild.yaml | 37 +++- cloudrun-wrapper.sh | 69 ++++--- deploy.sh | 149 +++++++++++++++- service-secrets.yaml | 96 ++++++++++ service.yaml | 24 ++- supervisor/services-cloudrun/chromium.conf | 10 ++ supervisor/services-cloudrun/dbus.conf | 10 ++ .../services-cloudrun/devtools-frontend.conf | 11 ++ supervisor/services-cloudrun/neko.conf | 10 ++ supervisor/services-cloudrun/xorg.conf | 10 ++ twilio/README.md | 79 ++++++++ twilio/generate-twilio-credential.js | 42 +++++ twilio/test-twilio-api.sh | 54 ++++++ twilio/test-twilio-node.js | 62 +++++++ twilio/test-twilio-turn.js | 45 +++++ twilio/twilio-credential-updater.sh | 72 ++++++++ twilio/twilio-token-service.js | 168 ++++++++++++++++++ twilio/update-twilio-credentials.sh | 96 ++++++++++ twilio/verify-twilio.js | 84 +++++++++ 23 files changed, 1323 insertions(+), 52 deletions(-) create mode 100644 .env.example create mode 100644 .gitignore create mode 100644 DEPLOYMENT.md create mode 100644 service-secrets.yaml create mode 100644 supervisor/services-cloudrun/chromium.conf create mode 100644 supervisor/services-cloudrun/dbus.conf create mode 100644 supervisor/services-cloudrun/devtools-frontend.conf create mode 100644 supervisor/services-cloudrun/neko.conf create mode 100644 supervisor/services-cloudrun/xorg.conf create mode 100644 twilio/README.md create mode 100644 twilio/generate-twilio-credential.js create mode 100755 twilio/test-twilio-api.sh create mode 100644 twilio/test-twilio-node.js create mode 100644 twilio/test-twilio-turn.js create mode 100644 twilio/twilio-credential-updater.sh create mode 100644 twilio/twilio-token-service.js create mode 100755 twilio/update-twilio-credentials.sh create mode 100644 twilio/verify-twilio.js diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..268baa2 --- /dev/null +++ b/.env.example @@ -0,0 +1,17 @@ +# Twilio Network Traversal Service Credentials +# Get these from your Twilio Console: +# 1. Go to https://console.twilio.com/ +# 2. Navigate to Account > API Keys & Tokens +# 3. Create a new API Key +# 4. Use the SID as TWILIO_ACCOUNT_SID +# 5. Use the Secret as TWILIO_AUTH_TOKEN +TWILIO_ACCOUNT_SID=SK...your_api_key_sid_here +TWILIO_AUTH_TOKEN=your_api_key_secret_here + +# Optional: Google Cloud Configuration +# If not provided, will use current gcloud config +# PROJECT_ID=your-project-id +# REGION=us-central1 + +# Optional: Service Configuration +# SERVICE_NAME=kernel-browser \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d571dba --- /dev/null +++ b/.gitignore @@ -0,0 +1,54 @@ +# Environment variables +.env +.env.local +*.env +!.env.example + +# Node modules +node_modules/ + +# Build outputs +dist/ +build/ +out/ + +# Logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* + +# OS files +.DS_Store +Thumbs.db + +# IDE files +.vscode/ +.idea/ +*.swp +*.swo + +# Temporary files +tmp/ +temp/ +*.tmp + +# Python +__pycache__/ +*.py[cod] +*$py.class +.Python +venv/ +env/ + +# Google Cloud +.gcloudignore +gcs-key.json +service-account-key.json + +# Docker +.dockerignore + +# Backup files +*.bak +*.backup \ No newline at end of file diff --git a/DEPLOYMENT.md b/DEPLOYMENT.md new file mode 100644 index 0000000..b029426 --- /dev/null +++ b/DEPLOYMENT.md @@ -0,0 +1,166 @@ +# Kernel Browser - Cloud Run Deployment Guide + +This guide explains how to deploy the Kernel Browser to Google Cloud Run with secure Twilio credential management. + +## Prerequisites + +- Google Cloud SDK (`gcloud`) installed +- Docker installed +- Git installed +- A Google Cloud Project with billing enabled +- Twilio account with API credentials (for WebRTC TURN servers) + +## Quick Start + +### 1. Clone the repository +```bash +git clone +cd browser-web-agent +git submodule update --init --recursive +``` + +### 2. Set up Twilio credentials +```bash +# Copy the example environment file +cp .env.example .env + +# Edit .env and add your Twilio credentials +# Get these from https://console.twilio.com/ > Account > API Keys & Tokens +``` + +Your `.env` file should contain: +``` +TWILIO_ACCOUNT_SID=SK...your_api_key_sid_here +TWILIO_AUTH_TOKEN=your_api_key_secret_here +``` + +### 3. Deploy to Cloud Run +```bash +./deploy.sh +``` + +The script will: +- Load credentials from `.env` +- Create/update secrets in Google Secret Manager +- Build and deploy the container to Cloud Run +- Configure all necessary permissions + +## Deployment Options + +### Using Cloud Build (recommended) +```bash +./deploy.sh +``` + +### Using local Docker build +```bash +./deploy.sh --local +``` + +### Specify project and region +```bash +./deploy.sh --project YOUR_PROJECT_ID --region us-central1 +``` + +## How It Works + +### Credential Management + +1. **Local Development**: Credentials are stored in `.env` file (gitignored) +2. **Secret Manager**: Deploy script automatically creates/updates secrets in Google Secret Manager +3. **Cloud Run**: Service uses `secretKeyRef` to securely access credentials at runtime +4. **Dynamic TURN**: Container fetches fresh TURN credentials from Twilio on startup + +### Security Features + +- Credentials never appear in code or logs +- Secrets are encrypted at rest and in transit +- Service account has minimal required permissions +- Automatic credential rotation support + +### Files Overview + +- `.env.example` - Template for environment variables +- `.env` - Your local credentials (gitignored) +- `deploy.sh` - Main deployment script with Secret Manager integration +- `service-secrets.yaml` - Cloud Run config with secret references +- `service.yaml` - Fallback config (for deployments without secrets) +- `cloudbuild.yaml` - Cloud Build configuration +- `twilio/` - Twilio credential management scripts + +## Updating Credentials + +To update Twilio credentials: + +1. Update `.env` with new credentials +2. Run `./deploy.sh` again +3. Script will update secrets and redeploy + +## Manual Secret Management + +If you need to manage secrets manually: + +```bash +# Create secrets +echo -n "YOUR_SID" | gcloud secrets create twilio-account-sid --data-file=- +echo -n "YOUR_TOKEN" | gcloud secrets create twilio-auth-token --data-file=- + +# Update secrets +echo -n "NEW_SID" | gcloud secrets versions add twilio-account-sid --data-file=- +echo -n "NEW_TOKEN" | gcloud secrets versions add twilio-auth-token --data-file=- + +# Grant access to service account +gcloud secrets add-iam-policy-binding twilio-account-sid \ + --member="serviceAccount:kernel-browser-sa@PROJECT_ID.iam.gserviceaccount.com" \ + --role="roles/secretmanager.secretAccessor" +``` + +## Service Endpoints + +After deployment, you'll have access to: + +- **Main Interface**: `https://SERVICE_URL/` +- **WebRTC Client**: `https://SERVICE_URL/` +- **Chrome DevTools**: `https://SERVICE_URL/devtools/` +- **DevTools WebSocket**: `wss://SERVICE_URL/cdp/ws` +- **Recording API**: `https://SERVICE_URL/api` +- **Health Check**: `https://SERVICE_URL/health` + +## Troubleshooting + +### Deployment fails +- Check that all prerequisites are installed +- Ensure billing is enabled on your GCP project +- Verify you have sufficient quota in your region + +### WebRTC not working +- Ensure Twilio credentials are correct +- Check Cloud Run logs: `gcloud run services logs read kernel-browser --region=us-central1` +- Verify TURN servers are accessible from your network + +### Secrets not found +- Run `gcloud secrets list` to verify secrets exist +- Check service account permissions +- Ensure Secret Manager API is enabled + +## Architecture + +``` +โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” +โ”‚ Client โ”‚โ”€โ”€โ”€โ”€โ–ถโ”‚ Cloud Run โ”‚โ”€โ”€โ”€โ”€โ–ถโ”‚ Secret Manager โ”‚ +โ”‚ (Browser) โ”‚ โ”‚ (Container) โ”‚ โ”‚ (Credentials) โ”‚ +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ + โ”‚ + โ–ผ + โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” + โ”‚ Twilio API โ”‚ + โ”‚ (TURN Servers) โ”‚ + โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ +``` + +## Support + +For issues or questions: +- Check logs: `gcloud run services logs read kernel-browser --region=us-central1` +- Review service status: `gcloud run services describe kernel-browser --region=us-central1` +- File an issue on GitHub \ No newline at end of file diff --git a/Dockerfile.cloudrun b/Dockerfile.cloudrun index 78776be..91ca6c7 100644 --- a/Dockerfile.cloudrun +++ b/Dockerfile.cloudrun @@ -1,6 +1,9 @@ # DevTools Frontend build stage using browser-operator-core FROM --platform=linux/amd64 ubuntu:22.04 AS devtools-builder +# Cache bust argument to force rebuilds +ARG CACHE_BUST + # Install required packages for DevTools frontend build RUN apt-get update && apt-get install -y \ curl \ @@ -50,7 +53,7 @@ RUN git checkout upstream/main RUN npm run build # Multi-stage build using kernel-images as base -FROM docker.io/golang:1.23.0 AS server-builder +FROM docker.io/golang:1.25.0 AS server-builder WORKDIR /workspace/server ARG TARGETOS @@ -253,10 +256,11 @@ COPY --from=devtools-builder /workspace/devtools/devtools-frontend/out/Default/g # Set permissions for DevTools files RUN chown -R kernel:kernel /usr/share/nginx/devtools -# Cloud Run specific: wrapper script only (nginx config is inline) +# Cloud Run specific: wrapper scripts (nginx config is inline) # DO NOT copy nginx.conf to avoid auto-start conflicts COPY cloudrun-wrapper.sh /cloudrun-wrapper.sh -RUN chmod +x /cloudrun-wrapper.sh +COPY twilio/twilio-credential-updater.sh /twilio-credential-updater.sh +RUN chmod +x /cloudrun-wrapper.sh /twilio-credential-updater.sh # Add essential services for neko WebRTC and Chromium COPY supervisor/services-cloudrun/dbus.conf /etc/supervisor/conf.d/services-cloudrun/dbus.conf diff --git a/cloudbuild.yaml b/cloudbuild.yaml index 3470f89..d178da5 100644 --- a/cloudbuild.yaml +++ b/cloudbuild.yaml @@ -28,6 +28,8 @@ steps: - 'Dockerfile.cloudrun' - '--cache-from' - 'us-docker.pkg.dev/$PROJECT_ID/gcr.io/kernel-browser:latest' + - '--build-arg' + - 'CACHE_BUST=$BUILD_ID' - '--tag' - 'us-docker.pkg.dev/$PROJECT_ID/gcr.io/kernel-browser:latest' - '.' @@ -39,24 +41,41 @@ steps: - 'push' - 'us-docker.pkg.dev/$PROJECT_ID/gcr.io/kernel-browser:latest' - # Step 5: Update the service.yaml with the correct project ID + # Step 5: Choose appropriate service.yaml based on secrets availability - name: 'gcr.io/cloud-builders/gcloud' entrypoint: 'bash' args: - '-c' - | - sed -i "s/PROJECT_ID/$PROJECT_ID/g" service.yaml - cat service.yaml + # Check if Twilio secrets exist + if gcloud secrets describe twilio-account-sid --project=$PROJECT_ID >/dev/null 2>&1 && \ + gcloud secrets describe twilio-auth-token --project=$PROJECT_ID >/dev/null 2>&1; then + echo "Using service-secrets.yaml with Secret Manager references" + SERVICE_YAML="service-secrets.yaml" + else + echo "Using standard service.yaml (secrets not configured)" + SERVICE_YAML="service.yaml" + fi + + # Update project ID in the chosen service file + sed -i "s/PROJECT_ID/$PROJECT_ID/g" $SERVICE_YAML + + echo "Deploying with: $SERVICE_YAML" + cat $SERVICE_YAML + + # Save the choice for next step + echo $SERVICE_YAML > /workspace/service_choice.txt # Step 6: Deploy to Cloud Run - name: 'gcr.io/cloud-builders/gcloud' + entrypoint: 'bash' args: - - 'run' - - 'services' - - 'replace' - - 'service.yaml' - - '--region=us-central1' - - '--quiet' + - '-c' + - | + SERVICE_YAML=$(cat /workspace/service_choice.txt) + gcloud run services replace $SERVICE_YAML \ + --region=us-central1 \ + --quiet # Step 7: Update traffic to latest revision - name: 'gcr.io/cloud-builders/gcloud' diff --git a/cloudrun-wrapper.sh b/cloudrun-wrapper.sh index 8e91d01..c49c995 100644 --- a/cloudrun-wrapper.sh +++ b/cloudrun-wrapper.sh @@ -13,6 +13,14 @@ export HEIGHT=768 export WIDTH=1024 export NEKO_BIND=:8081 +# Get fresh Twilio TURN credentials if available +if [ -f /twilio-credential-updater.sh ]; then + echo "[cloudrun-wrapper] Getting fresh Twilio TURN credentials..." + source /twilio-credential-updater.sh +else + echo "[cloudrun-wrapper] Twilio updater not found, using credentials from environment" +fi + # Port configuration for Cloud Run export PORT=${PORT:-8080} export CHROMIUM_FLAGS="${CHROMIUM_FLAGS:---user-data-dir=/home/kernel/user-data --disable-dev-shm-usage --disable-gpu --start-maximized --disable-software-rasterizer --remote-allow-origins=* --no-sandbox --disable-setuid-sandbox --disable-features=VizDisplayCompositor --custom-devtools-frontend=http://localhost:8001/ https://www.google.com}" @@ -26,27 +34,8 @@ mkdir -p /tmp/nginx_client_temp /tmp/nginx_proxy_temp /tmp/nginx_fastcgi_temp \ /tmp/runtime-kernel /var/log/neko /tmp/recordings \ /tmp/supervisord /tmp/dbus -# Skip nginx test - supervisor will handle nginx startup -echo "[cloudrun-wrapper] Skipping nginx test - supervisor manages nginx" - -# Start supervisor for kernel-images services in background -echo "[cloudrun-wrapper] Starting kernel-images services..." -supervisord -c /etc/supervisor/supervisord-cloudrun.conf -n & -SUPERVISOR_PID=$! - -# Wait a moment for services to start -sleep 5 - -# Cleanup function -cleanup() { - echo "[cloudrun-wrapper] Cleaning up..." - kill $SUPERVISOR_PID 2>/dev/null || true - supervisorctl -c /etc/supervisor/supervisord-cloudrun.conf stop all 2>/dev/null || true -} -trap cleanup TERM INT - -# Start nginx proxy on Cloud Run port (proxies directly to services) -echo "[cloudrun-wrapper] Starting nginx proxy on port $PORT" +# Start nginx immediately in background to respond to CloudRun health checks +echo "[cloudrun-wrapper] Starting nginx proxy on port $PORT (background)" # Create nginx config file cat > /tmp/nginx.conf < /dev/null 2>&1; then + echo "[cloudrun-wrapper] Neko service is ready" + break + fi + if [ $i -eq 60 ]; then + echo "[cloudrun-wrapper] Warning: Neko service not ready after 60 seconds, starting nginx anyway" + fi + sleep 1 +done + +# Start nginx in foreground (required for Cloud Run) +echo "[cloudrun-wrapper] Starting nginx proxy on port $PORT" exec nginx -g "daemon off;" -c /tmp/nginx.conf \ No newline at end of file diff --git a/deploy.sh b/deploy.sh index 3d7f4b1..50527e1 100755 --- a/deploy.sh +++ b/deploy.sh @@ -34,6 +34,127 @@ info() { echo -e "โ„น๏ธ $1" } +# Load environment variables from .env file +load_env_file() { + if [ -f .env ]; then + info "Loading configuration from .env file..." + # Export variables from .env, ignoring comments and empty lines + set -a + source <(grep -v '^#' .env | grep -v '^$') + set +a + success "Configuration loaded from .env" + elif [ -f .env.example ]; then + warning "No .env file found. Copy .env.example to .env and add your credentials" + info "Run: cp .env.example .env" + fi +} + +# Validate Twilio credentials +validate_twilio_credentials() { + if [ -z "${TWILIO_ACCOUNT_SID:-}" ] || [ -z "${TWILIO_AUTH_TOKEN:-}" ]; then + warning "Twilio credentials not found in environment" + echo "You can either:" + echo " 1. Add them to .env file (recommended)" + echo " 2. Enter them now (temporary)" + echo " 3. Skip (WebRTC may not work properly)" + echo + read -p "Enter choice [1/2/3]: " choice + + case $choice in + 1) + info "Please add credentials to .env file and re-run the script" + exit 0 + ;; + 2) + read -p "Enter Twilio Account SID: " TWILIO_ACCOUNT_SID + read -s -p "Enter Twilio Auth Token: " TWILIO_AUTH_TOKEN + echo + ;; + 3) + warning "Skipping Twilio configuration" + return 1 + ;; + *) + error "Invalid choice" + ;; + esac + fi + + if [ -n "${TWILIO_ACCOUNT_SID:-}" ] && [ -n "${TWILIO_AUTH_TOKEN:-}" ]; then + # Basic validation of credential format + if [[ ! "$TWILIO_ACCOUNT_SID" =~ ^SK[a-f0-9]{32}$ ]]; then + warning "Twilio Account SID format looks incorrect (should start with SK and be 34 chars)" + fi + success "Twilio credentials configured" + return 0 + else + return 1 + fi +} + +# Setup Google Secret Manager secrets +setup_secrets() { + if ! validate_twilio_credentials; then + warning "Skipping Secret Manager setup - no Twilio credentials provided" + return 0 + fi + + info "Setting up Google Secret Manager secrets..." + + # Enable Secret Manager API if not already enabled + info "Enabling Secret Manager API..." + gcloud services enable secretmanager.googleapis.com --project="$PROJECT_ID" --quiet + + # Create or update twilio-account-sid secret + if gcloud secrets describe twilio-account-sid --project="$PROJECT_ID" &>/dev/null; then + echo -n "$TWILIO_ACCOUNT_SID" | gcloud secrets versions add twilio-account-sid \ + --data-file=- \ + --project="$PROJECT_ID" + info "Updated twilio-account-sid secret" + else + echo -n "$TWILIO_ACCOUNT_SID" | gcloud secrets create twilio-account-sid \ + --data-file=- \ + --project="$PROJECT_ID" \ + --replication-policy="automatic" + success "Created twilio-account-sid secret" + fi + + # Create or update twilio-auth-token secret + if gcloud secrets describe twilio-auth-token --project="$PROJECT_ID" &>/dev/null; then + echo -n "$TWILIO_AUTH_TOKEN" | gcloud secrets versions add twilio-auth-token \ + --data-file=- \ + --project="$PROJECT_ID" + info "Updated twilio-auth-token secret" + else + echo -n "$TWILIO_AUTH_TOKEN" | gcloud secrets create twilio-auth-token \ + --data-file=- \ + --project="$PROJECT_ID" \ + --replication-policy="automatic" + success "Created twilio-auth-token secret" + fi + + # Grant access to service account + local sa_email="kernel-browser-sa@${PROJECT_ID}.iam.gserviceaccount.com" + + info "Granting Secret Manager access to service account..." + gcloud secrets add-iam-policy-binding twilio-account-sid \ + --member="serviceAccount:$sa_email" \ + --role="roles/secretmanager.secretAccessor" \ + --project="$PROJECT_ID" \ + --quiet + + gcloud secrets add-iam-policy-binding twilio-auth-token \ + --member="serviceAccount:$sa_email" \ + --role="roles/secretmanager.secretAccessor" \ + --project="$PROJECT_ID" \ + --quiet + + # Set flag to use secrets-enabled service.yaml + export USE_SECRETS=true + + success "Secret Manager configured with Twilio credentials" +} + # Check prerequisites check_prerequisites() { info "Checking prerequisites..." @@ -85,6 +206,7 @@ enable_apis() { "containerregistry.googleapis.com" "compute.googleapis.com" "storage.googleapis.com" + "secretmanager.googleapis.com" ) for api in "${apis[@]}"; do @@ -175,16 +297,29 @@ deploy_local() { info "Deploying to Cloud Run..." - # Update service.yaml with project ID and image - sed -i.bak "s/PROJECT_ID/$PROJECT_ID/g" service.yaml - sed -i.bak "s|gcr.io/PROJECT_ID/kernel-browser:latest|$image_name|g" service.yaml + # Choose appropriate service.yaml based on secrets availability + local service_file="service.yaml" + if [ "${USE_SECRETS:-false}" = "true" ]; then + if gcloud secrets describe twilio-account-sid --project="$PROJECT_ID" &>/dev/null && \ + gcloud secrets describe twilio-auth-token --project="$PROJECT_ID" &>/dev/null; then + service_file="service-secrets.yaml" + info "Using service-secrets.yaml with Secret Manager references" + else + warning "Secrets not found, falling back to standard service.yaml" + fi + fi + + # Update service file with project ID and image + cp "$service_file" "${service_file}.tmp" + sed -i.bak "s/PROJECT_ID/$PROJECT_ID/g" "${service_file}.tmp" + sed -i.bak "s|us-docker.pkg.dev/func-241017/gcr.io/kernel-browser:latest|$image_name|g" "${service_file}.tmp" - gcloud run services replace service.yaml \ + gcloud run services replace "${service_file}.tmp" \ --region="$REGION" \ --project="$PROJECT_ID" - # Restore original service.yaml - mv service.yaml.bak service.yaml + # Clean up temporary files + rm -f "${service_file}.tmp" "${service_file}.tmp.bak" success "Local build and deployment completed" } @@ -255,9 +390,11 @@ main() { done check_prerequisites + load_env_file setup_project enable_apis create_service_account + setup_secrets update_submodules if [ "${LOCAL_BUILD:-false}" = "true" ]; then diff --git a/service-secrets.yaml b/service-secrets.yaml new file mode 100644 index 0000000..34b4b91 --- /dev/null +++ b/service-secrets.yaml @@ -0,0 +1,96 @@ +apiVersion: serving.knative.dev/v1 +kind: Service +metadata: + name: kernel-browser + annotations: + run.googleapis.com/ingress: all + run.googleapis.com/ingress-status: all +spec: + template: + metadata: + annotations: + # Use second generation execution environment + run.googleapis.com/execution-environment: gen2 + # Disable CPU throttling for consistent performance + run.googleapis.com/cpu-throttling: "false" + # Increase startup timeout to 10 minutes for complex service startup + run.googleapis.com/timeout: "600" + # Auto-scaling settings + autoscaling.knative.dev/minScale: "1" + autoscaling.knative.dev/maxScale: "2" + spec: + # Allow multiple concurrent requests (browser can handle multiple tabs/requests) + containerConcurrency: 10 + # 1 hour timeout for long browser sessions + timeoutSeconds: 3600 + # Service account for GCP access + serviceAccountName: kernel-browser-sa + containers: + - name: kernel-browser + # This will be set during deployment + image: us-docker.pkg.dev/func-241017/gcr.io/kernel-browser:latest + ports: + - name: http1 + containerPort: 8080 + resources: + limits: + # 2 CPU cores (within quota limits) + cpu: "2" + # 4GiB memory (within quota limits) + memory: "4Gi" + requests: + cpu: "1" + memory: "2Gi" + env: + # Enable WebRTC for live viewing + - name: ENABLE_WEBRTC + value: "true" + # Run as non-root user (Cloud Run requirement) + - name: RUN_AS_ROOT + value: "false" + # Chrome optimizations for Cloud Run + - name: CHROMIUM_FLAGS + value: "--user-data-dir=/home/kernel/user-data --disable-dev-shm-usage --disable-gpu --start-maximized --disable-software-rasterizer --remote-allow-origins=* --no-sandbox --disable-setuid-sandbox --disable-features=VizDisplayCompositor" + # Display configuration + - name: DISPLAY_NUM + value: "1" + - name: HEIGHT + value: "768" + - name: WIDTH + value: "1024" + # Twilio API Key credentials from Secret Manager + - name: TWILIO_ACCOUNT_SID + valueFrom: + secretKeyRef: + name: twilio-account-sid + key: latest + - name: TWILIO_AUTH_TOKEN + valueFrom: + secretKeyRef: + name: twilio-auth-token + key: latest + # Dynamic TURN credentials will be generated using above secrets + # The twilio-credential-updater.sh script will use these at startup + - name: NEKO_ICESERVERS + value: 'DYNAMIC' # Placeholder - will be replaced by twilio-credential-updater.sh + # Disable TCP multiplexing (nginx handles port 8080) + - name: NEKO_WEBRTC_TCPMUX + value: "0" + # Optional: Google Cloud Storage bucket for recordings + - name: GCS_BUCKET + value: "kernel-browser-recordings" + # API configuration + - name: KERNEL_IMAGES_API_PORT + value: "10001" + - name: KERNEL_IMAGES_API_FRAME_RATE + value: "10" + - name: KERNEL_IMAGES_API_MAX_SIZE_MB + value: "500" + - name: KERNEL_IMAGES_API_OUTPUT_DIR + value: "/tmp/recordings" + # Force new revision + - name: DEPLOYMENT_VERSION + value: "v12-secret-manager" + traffic: + - percent: 100 + latestRevision: true \ No newline at end of file diff --git a/service.yaml b/service.yaml index 8f59516..f39c167 100644 --- a/service.yaml +++ b/service.yaml @@ -13,8 +13,8 @@ spec: run.googleapis.com/execution-environment: gen2 # Disable CPU throttling for consistent performance run.googleapis.com/cpu-throttling: "false" - # Increase startup timeout - run.googleapis.com/timeout: "3600" + # Increase startup timeout to 10 minutes for complex service startup + run.googleapis.com/timeout: "600" # Auto-scaling settings autoscaling.knative.dev/minScale: "1" autoscaling.knative.dev/maxScale: "2" @@ -58,14 +58,17 @@ spec: value: "768" - name: WIDTH value: "1024" - # ICE servers configuration for WebRTC (includes both STUN and TURN) + # Twilio API Key credentials for dynamic TURN generation + - name: TWILIO_ACCOUNT_SID + value: "SK5346918f48275d6571be927e84cfd6f8" + - name: TWILIO_AUTH_TOKEN + value: "OWJDRGxZZnxUlwOVXbupRs9yhQaylXzo" + # Fresh Twilio TURN credentials (manually generated for now) - name: NEKO_ICESERVERS - value: '[{"urls":["stun:global.stun.twilio.com:3478"]},{"urls":["turn:global.turn.twilio.com:3478?transport=udp"],"username":"464cefa09d5a8b4030b34b3faf15871b5efe0eef8331e9324f3f4f9144158ada","credential":"1Fm/UdpnNFbvfDPBtETUSZ4BhQsi0cubgLBdbScluPs="}]' - # WebRTC configuration - - name: NEKO_WEBRTC_TCPPORT - value: "8081" - - name: NEKO_WEBRTC_UDPPORT - value: "8082" + value: '[{"urls":["turn:global.turn.twilio.com:3478?transport=tcp"],"username":"b88cfa1369190aa9cbc8bfaca683c457476b5d7062aa0a7b184c87db3ade0ff5","credential":"m3oPEt94gQQP+g2yd4R32MuZtCCdw6Rmmuvkp6/Dkd0="},{"urls":["turn:global.turn.twilio.com:443?transport=tcp"],"username":"b88cfa1369190aa9cbc8bfaca683c457476b5d7062aa0a7b184c87db3ade0ff5","credential":"m3oPEt94gQQP+g2yd4R32MuZtCCdw6Rmmuvkp6/Dkd0="}]' + # Disable TCP multiplexing (nginx handles port 8080) + - name: NEKO_WEBRTC_TCPMUX + value: "0" # Optional: Google Cloud Storage bucket for recordings - name: GCS_BUCKET value: "kernel-browser-recordings" @@ -78,6 +81,9 @@ spec: value: "500" - name: KERNEL_IMAGES_API_OUTPUT_DIR value: "/tmp/recordings" + # Force new revision + - name: DEPLOYMENT_VERSION + value: "v11-fresh-twilio-credentials" traffic: - percent: 100 latestRevision: true \ No newline at end of file diff --git a/supervisor/services-cloudrun/chromium.conf b/supervisor/services-cloudrun/chromium.conf new file mode 100644 index 0000000..6fdc548 --- /dev/null +++ b/supervisor/services-cloudrun/chromium.conf @@ -0,0 +1,10 @@ +[program:chromium] +command=/bin/bash -lc 'sleep 3 && DISPLAY=":1" DBUS_SESSION_BUS_ADDRESS="unix:path=/tmp/dbus/session_bus_socket" chromium --remote-debugging-port=9223 --remote-allow-origins=* --user-data-dir=/home/kernel/user-data --password-store=basic --no-first-run --disable-dev-shm-usage --disable-gpu --start-maximized --disable-software-rasterizer --no-sandbox --disable-setuid-sandbox --disable-features=VizDisplayCompositor --custom-devtools-frontend=http://localhost:8001/ https://www.google.com' +autostart=true +autorestart=true +startsecs=8 +priority=20 +stdout_logfile=/var/log/supervisord/chromium/chromium.log +stdout_logfile_maxbytes=50MB +redirect_stderr=true +environment=HOME="/home/kernel",USER="kernel",DISPLAY=":1",DBUS_SESSION_BUS_ADDRESS="unix:path=/tmp/dbus/session_bus_socket" \ No newline at end of file diff --git a/supervisor/services-cloudrun/dbus.conf b/supervisor/services-cloudrun/dbus.conf new file mode 100644 index 0000000..beb4cda --- /dev/null +++ b/supervisor/services-cloudrun/dbus.conf @@ -0,0 +1,10 @@ +[program:dbus] +command=/bin/bash -lc 'mkdir -p /tmp/dbus && dbus-uuidgen --ensure && dbus-daemon --session --address=unix:path=/tmp/dbus/session_bus_socket --nopidfile --nosyslog --nofork' +autostart=true +autorestart=true +startsecs=2 +priority=1 +stdout_logfile=/var/log/supervisord/dbus/dbus.log +stdout_logfile_maxbytes=50MB +redirect_stderr=true +environment=HOME="/home/kernel",USER="kernel",DBUS_SESSION_BUS_ADDRESS="unix:path=/tmp/dbus/session_bus_socket" \ No newline at end of file diff --git a/supervisor/services-cloudrun/devtools-frontend.conf b/supervisor/services-cloudrun/devtools-frontend.conf new file mode 100644 index 0000000..b0855ed --- /dev/null +++ b/supervisor/services-cloudrun/devtools-frontend.conf @@ -0,0 +1,11 @@ +[program:devtools-frontend] +command=/bin/bash -c 'cd /usr/share/nginx/devtools && python3 -m http.server 8001' +autostart=true +autorestart=true +startsecs=5 +priority=20 +stdout_logfile=/var/log/supervisord/devtools-frontend/devtools-frontend.log +stdout_logfile_maxbytes=50MB +redirect_stderr=true +environment=HOME="/home/kernel",USER="kernel" +user=kernel \ No newline at end of file diff --git a/supervisor/services-cloudrun/neko.conf b/supervisor/services-cloudrun/neko.conf new file mode 100644 index 0000000..969b62d --- /dev/null +++ b/supervisor/services-cloudrun/neko.conf @@ -0,0 +1,10 @@ +[program:neko] +command=/usr/bin/neko serve --server.static /var/www --server.bind 0.0.0.0:8081 +autostart=true +autorestart=true +startsecs=5 +priority=15 +stdout_logfile=/var/log/supervisord/neko/neko.log +stdout_logfile_maxbytes=50MB +redirect_stderr=true +environment=HOME="/home/kernel",USER="kernel",DISPLAY=":1",NEKO_WEBRTC_ICESERVERS_FRONTEND="",NEKO_WEBRTC_ICESERVERS_BACKEND="" \ No newline at end of file diff --git a/supervisor/services-cloudrun/xorg.conf b/supervisor/services-cloudrun/xorg.conf new file mode 100644 index 0000000..243c8f5 --- /dev/null +++ b/supervisor/services-cloudrun/xorg.conf @@ -0,0 +1,10 @@ +[program:xorg] +command=/usr/bin/Xorg :1 -config /etc/neko/xorg.conf -noreset -nolisten tcp +autostart=true +autorestart=true +startsecs=2 +priority=2 +stdout_logfile=/var/log/supervisord/xorg/xorg.log +stdout_logfile_maxbytes=50MB +redirect_stderr=true +environment=HOME="/home/kernel",USER="kernel" \ No newline at end of file diff --git a/twilio/README.md b/twilio/README.md new file mode 100644 index 0000000..3b77d32 --- /dev/null +++ b/twilio/README.md @@ -0,0 +1,79 @@ +# Twilio TURN Server Integration + +This folder contains scripts for integrating Twilio's Network Traversal Service to provide TURN server credentials for WebRTC in Cloud Run. + +## Scripts + +### `twilio-credential-updater.sh` +- **Purpose**: Called by `cloudrun-wrapper.sh` on container startup +- **Function**: Fetches fresh TURN credentials from Twilio API +- **Fallback**: Uses free TURN servers if Twilio fails +- **Environment Variables Required**: + - `TWILIO_ACCOUNT_SID` (API Key SID) + - `TWILIO_AUTH_TOKEN` (API Key Secret) + +### `twilio-token-service.js` +- **Purpose**: Node.js service for TURN credential generation +- **Features**: + - HTTP server mode (`--server` flag) + - One-time credential generation (default) + - Credential caching (1 hour) +- **Dependencies**: Express.js (for server mode) + +### `test-twilio-api.sh` +- **Purpose**: Test Twilio Network Traversal Service API +- **Usage**: `TWILIO_ACCOUNT_SID=xxx TWILIO_AUTH_TOKEN=xxx ./test-twilio-api.sh` +- **Output**: Formatted credentials for `NEKO_ICESERVERS` + +### `test-twilio-node.js` +- **Purpose**: Simple Node.js test for Twilio API +- **Usage**: Node.js version of the API test +- **Dependencies**: Only Node.js built-ins + +### `update-twilio-credentials.sh` +- **Purpose**: Update running Cloud Run service with fresh credentials +- **Usage**: Run periodically to refresh credentials +- **Features**: Direct Cloud Run service update + +## Integration + +The main integration point is in `../cloudrun-wrapper.sh`: + +```bash +# Get fresh Twilio TURN credentials if available +if [ -f /twilio-credential-updater.sh ]; then + echo "[cloudrun-wrapper] Getting fresh Twilio TURN credentials..." + source /twilio-credential-updater.sh +else + echo "[cloudrun-wrapper] Twilio updater not found, using credentials from environment" +fi +``` + +## Credentials Format + +Twilio Network Traversal Service returns credentials in this format: + +```json +{ + "ice_servers": [ + { + "url": "turn:global.turn.twilio.com:3478?transport=tcp", + "username": "long-generated-username", + "credential": "base64-encoded-credential" + } + ], + "ttl": "86400" +} +``` + +These are converted to neko format: + +```json +[ + { + "urls": ["turn:global.turn.twilio.com:3478?transport=tcp"], + "username": "long-generated-username", + "credential": "base64-encoded-credential" + } +] +``` \ No newline at end of file diff --git a/twilio/generate-twilio-credential.js b/twilio/generate-twilio-credential.js new file mode 100644 index 0000000..fcc384d --- /dev/null +++ b/twilio/generate-twilio-credential.js @@ -0,0 +1,42 @@ +#!/usr/bin/env node + +const crypto = require('crypto'); + +// Twilio API credentials +const API_KEY_SID = 'SK5346918f48275d6571be927e84cfd6f8'; +const API_KEY_SECRET = process.env.TWILIO_API_KEY_SECRET || 'YOUR_API_KEY_SECRET_HERE'; + +// Time to live (in seconds) - 24 hours +const ttl = 86400; + +// Calculate expiration timestamp +const unixTimestamp = Math.floor(Date.now() / 1000) + ttl; + +// Create username (timestamp:apiKeySid) +const username = `${unixTimestamp}:${API_KEY_SID}`; + +// Generate password using HMAC-SHA1 +const password = crypto + .createHmac('sha1', API_KEY_SECRET) + .update(username) + .digest('base64'); + +console.log('Twilio TURN Credential Generator'); +console.log('=' .repeat(60)); +console.log('\nConfiguration:'); +console.log(`API Key SID: ${API_KEY_SID}`); +console.log(`API Key Secret: ${API_KEY_SECRET === 'YOUR_API_KEY_SECRET_HERE' ? '[NOT SET - Please provide]' : '[HIDDEN]'}`); +console.log(`TTL: ${ttl} seconds (${ttl/3600} hours)`); +console.log('\nGenerated Credentials:'); +console.log(`Username: ${username}`); +console.log(`Password: ${password}`); +console.log(`\nExpires at: ${new Date(unixTimestamp * 1000).toISOString()}`); + +console.log('\nFor service.yaml, use:'); +console.log(`- name: NEKO_ICESERVERS`); +console.log(` value: '[{"urls": ["turn:global.turn.twilio.com:3478?transport=tcp", "turns:global.turn.twilio.com:5349?transport=tcp"], "username": "${username}", "credential": "${password}"}]'`); + +if (API_KEY_SECRET === 'YOUR_API_KEY_SECRET_HERE') { + console.log('\nโš ๏ธ WARNING: You need to set the actual API Key Secret!'); + console.log('Run with: TWILIO_API_KEY_SECRET=your_actual_secret node generate-twilio-credential.js'); +} \ No newline at end of file diff --git a/twilio/test-twilio-api.sh b/twilio/test-twilio-api.sh new file mode 100755 index 0000000..a07a03d --- /dev/null +++ b/twilio/test-twilio-api.sh @@ -0,0 +1,54 @@ +#!/bin/bash + +# Test Twilio Network Traversal Service API +# This generates temporary TURN credentials + +ACCOUNT_SID="${TWILIO_ACCOUNT_SID:-YOUR_ACCOUNT_SID}" +AUTH_TOKEN="${TWILIO_AUTH_TOKEN:-YOUR_AUTH_TOKEN}" + +echo "Testing Twilio Network Traversal Service API" +echo "============================================" +echo "Account SID: $ACCOUNT_SID" +echo "" + +# Make API call to get TURN credentials +echo "Requesting TURN credentials from Twilio..." +echo "" + +response=$(curl -s -X POST \ + "https://api.twilio.com/2010-04-01/Accounts/${ACCOUNT_SID}/Tokens.json" \ + -u "${ACCOUNT_SID}:${AUTH_TOKEN}") + +# Check if request was successful +if echo "$response" | grep -q "ice_servers"; then + echo "โœ… Success! Received TURN credentials:" + echo "$response" | python3 -m json.tool + + # Extract and format for service.yaml + echo "" + echo "Formatted for NEKO_ICESERVERS:" + echo "$response" | python3 -c " +import json +import sys +data = json.load(sys.stdin) +servers = [] +for server in data.get('ice_servers', []): + if server.get('url', '').startswith('turn'): + url = server['url'] + if 'transport=' not in url: + url += '?transport=tcp' + servers.append({ + 'urls': [url], + 'username': server.get('username', ''), + 'credential': server.get('credential', '') + }) +print(json.dumps(servers)) +" +else + echo "โŒ Failed to get TURN credentials" + echo "Response: $response" + echo "" + echo "Make sure you have:" + echo "1. Valid Twilio Account SID and Auth Token" + echo "2. Network Traversal Service enabled on your Twilio account" +fi \ No newline at end of file diff --git a/twilio/test-twilio-node.js b/twilio/test-twilio-node.js new file mode 100644 index 0000000..da67c9a --- /dev/null +++ b/twilio/test-twilio-node.js @@ -0,0 +1,62 @@ +#!/usr/bin/env node + +const https = require('https'); + +const ACCOUNT_SID = 'SK5346918f48275d6571be927e84cfd6f8'; +const AUTH_TOKEN = 'OWJDRGxZZnxUlwOVXbupRs9yhQaylXzo'; + +function getTwilioTurnCredentials() { + return new Promise((resolve, reject) => { + console.log('Fetching TURN credentials from Twilio...'); + + const auth = Buffer.from(`${ACCOUNT_SID}:${AUTH_TOKEN}`).toString('base64'); + + const options = { + hostname: 'api.twilio.com', + port: 443, + path: `/2010-04-01/Accounts/${ACCOUNT_SID}/Tokens.json`, + method: 'POST', + headers: { + 'Authorization': `Basic ${auth}`, + 'Content-Type': 'application/x-www-form-urlencoded', + 'Content-Length': 0 + } + }; + + const req = https.request(options, (res) => { + let data = ''; + res.on('data', (chunk) => data += chunk); + res.on('end', () => { + try { + const response = JSON.parse(data); + if (res.statusCode === 201 || res.statusCode === 200) { + console.log('โœ… Success!'); + resolve(response.ice_servers || []); + } else { + reject(new Error(`API error: ${response.message}`)); + } + } catch (error) { + reject(error); + } + }); + }); + + req.on('error', reject); + req.end(); + }); +} + +getTwilioTurnCredentials() + .then(servers => { + const nekoServers = servers + .filter(s => s.url && s.url.startsWith('turn')) + .map(s => ({ + urls: [s.url], + username: s.username, + credential: s.credential + })); + + console.log('\nFormatted for NEKO_ICESERVERS:'); + console.log(JSON.stringify(nekoServers)); + }) + .catch(console.error); \ No newline at end of file diff --git a/twilio/test-twilio-turn.js b/twilio/test-twilio-turn.js new file mode 100644 index 0000000..58782d0 --- /dev/null +++ b/twilio/test-twilio-turn.js @@ -0,0 +1,45 @@ +#!/usr/bin/env node + +// Test Twilio TURN server credentials +const crypto = require('crypto'); + +// Parse the credentials from service.yaml +const username = "1757273052:SK5346918f48275d6571be927e84cfd6f8"; +const credential = "12HiXDndTPnUQZorm6TDDHd9Co8="; + +console.log("Testing Twilio TURN credentials:"); +console.log("Username:", username); +console.log("Credential:", credential); + +// Extract timestamp from username +const parts = username.split(':'); +const timestamp = parseInt(parts[0]); +const apiKeySid = parts[1]; + +console.log("\nParsed values:"); +console.log("Timestamp:", timestamp); +console.log("API Key SID:", apiKeySid); + +// Check if timestamp is valid (not expired) +const now = Math.floor(Date.now() / 1000); +const expiresIn = timestamp - now; + +console.log("\nTimestamp validation:"); +console.log("Current time (Unix):", now); +console.log("Credential timestamp:", timestamp); +console.log("Expires in:", expiresIn, "seconds"); + +if (expiresIn < 0) { + console.log("โŒ Credentials have EXPIRED!"); +} else { + console.log("โœ… Credentials are still valid for", Math.floor(expiresIn / 3600), "hours"); +} + +// To verify the credential, we would need the API Key Secret +// The credential should be: base64(hmac-sha1(username, apiKeySecret)) +console.log("\nNote: To fully verify the credential, we would need the API Key Secret."); +console.log("The credential should be computed as: base64(hmac-sha1(username, apiKeySecret))"); + +// Test with curl (requires actual network test) +console.log("\nTo test the TURN server directly, you can use a tool like 'turnutils_uclient':"); +console.log(`turnutils_uclient -T -p 3478 -u "${username}" -w "${credential}" turn:global.turn.twilio.com`); \ No newline at end of file diff --git a/twilio/twilio-credential-updater.sh b/twilio/twilio-credential-updater.sh new file mode 100644 index 0000000..43fe575 --- /dev/null +++ b/twilio/twilio-credential-updater.sh @@ -0,0 +1,72 @@ +#!/bin/bash + +# Twilio TURN Credential Updater for Cloud Run +# This script is called from cloudrun-wrapper.sh to get fresh credentials on startup + +set -e + +# Check if we're using dynamic credentials mode (from Secret Manager) +if [ "$NEKO_ICESERVERS" = "DYNAMIC" ]; then + echo "[twilio-updater] Dynamic credentials mode - will fetch fresh TURN credentials" +elif [ -n "$NEKO_ICESERVERS" ] && [ "$NEKO_ICESERVERS" != "DYNAMIC" ]; then + # NEKO_ICESERVERS is already set with actual credentials + echo "[twilio-updater] Using pre-configured TURN credentials" + return 0 2>/dev/null || exit 0 +fi + +# Twilio credentials (passed as environment variables) +ACCOUNT_SID="${TWILIO_ACCOUNT_SID}" +AUTH_TOKEN="${TWILIO_AUTH_TOKEN}" + +if [ -z "$ACCOUNT_SID" ] || [ -z "$AUTH_TOKEN" ]; then + echo "[twilio-updater] Warning: Twilio credentials not set, using fallback TURN servers" + # Export fallback servers + export NEKO_ICESERVERS='[{"urls": ["turn:openrelay.metered.ca:80?transport=tcp"], "username": "openrelayproject", "credential": "openrelayproject"}]' + return 0 2>/dev/null || exit 0 +fi + +echo "[twilio-updater] Fetching fresh TURN credentials from Twilio..." + +# Get TURN credentials from Twilio API +response=$(curl -s -X POST \ + "https://api.twilio.com/2010-04-01/Accounts/${ACCOUNT_SID}/Tokens.json" \ + -u "${ACCOUNT_SID}:${AUTH_TOKEN}" 2>/dev/null) + +# Check if request was successful +if echo "$response" | grep -q "ice_servers"; then + # Format credentials for neko + ice_servers=$(echo "$response" | python3 -c " +import json +import sys +try: + data = json.load(sys.stdin) + servers = [] + for server in data.get('ice_servers', []): + if server.get('url', '').startswith('turn'): + url = server['url'] + if 'transport=' not in url: + url += '?transport=tcp' + servers.append({ + 'urls': [url], + 'username': server.get('username', ''), + 'credential': server.get('credential', '') + }) + print(json.dumps(servers)) +except: + print('[]') +" 2>/dev/null) + + if [ -n "$ice_servers" ] && [ "$ice_servers" != "[]" ]; then + echo "[twilio-updater] Successfully retrieved TURN credentials" + export NEKO_ICESERVERS="$ice_servers" + else + echo "[twilio-updater] Failed to parse TURN credentials, using fallback" + export NEKO_ICESERVERS='[{"urls": ["turn:openrelay.metered.ca:80?transport=tcp"], "username": "openrelayproject", "credential": "openrelayproject"}]' + fi +else + echo "[twilio-updater] Failed to get TURN credentials from Twilio, using fallback" + echo "[twilio-updater] Response: ${response:0:100}..." + export NEKO_ICESERVERS='[{"urls": ["turn:openrelay.metered.ca:80?transport=tcp"], "username": "openrelayproject", "credential": "openrelayproject"}]' +fi + +echo "[twilio-updater] NEKO_ICESERVERS set to: ${NEKO_ICESERVERS:0:100}..." \ No newline at end of file diff --git a/twilio/twilio-token-service.js b/twilio/twilio-token-service.js new file mode 100644 index 0000000..a845e78 --- /dev/null +++ b/twilio/twilio-token-service.js @@ -0,0 +1,168 @@ +#!/usr/bin/env node + +/** + * Twilio Network Traversal Service Token Generator + * Generates short-lived TURN credentials using Twilio's API + */ + +const https = require('https'); +const express = require('express'); + +// Twilio Account credentials (these are different from API Key) +const ACCOUNT_SID = process.env.TWILIO_ACCOUNT_SID || 'YOUR_ACCOUNT_SID'; +const AUTH_TOKEN = process.env.TWILIO_AUTH_TOKEN || 'YOUR_AUTH_TOKEN'; + +// Optional: API Key credentials (if using API keys instead of master credentials) +const API_KEY_SID = process.env.TWILIO_API_KEY_SID || 'SK5346918f48275d6571be927e84cfd6f8'; +const API_KEY_SECRET = process.env.TWILIO_API_KEY_SECRET || 'OWJDRGxZZnxUlwOVXbupRs9yhQaylXzo'; + +// Cache for tokens +let tokenCache = null; +let tokenExpiry = 0; + +/** + * Get TURN credentials from Twilio Network Traversal Service + */ +async function getTwilioTurnCredentials() { + return new Promise((resolve, reject) => { + // Check cache first + if (tokenCache && Date.now() < tokenExpiry) { + console.log('Returning cached TURN credentials'); + return resolve(tokenCache); + } + + console.log('Fetching new TURN credentials from Twilio...'); + + // Twilio API endpoint for Network Traversal Service + const options = { + hostname: 'api.twilio.com', + port: 443, + path: `/2010-04-01/Accounts/${ACCOUNT_SID}/Tokens.json`, + method: 'POST', + auth: `${ACCOUNT_SID}:${AUTH_TOKEN}`, + headers: { + 'Content-Type': 'application/x-www-form-urlencoded', + 'Content-Length': 0 + } + }; + + const req = https.request(options, (res) => { + let data = ''; + + res.on('data', (chunk) => { + data += chunk; + }); + + res.on('end', () => { + try { + const response = JSON.parse(data); + + if (res.statusCode !== 201 && res.statusCode !== 200) { + console.error('Twilio API error:', response); + return reject(new Error(`Twilio API error: ${response.message || 'Unknown error'}`)); + } + + // Parse the ice_servers from response + const iceServers = response.ice_servers || []; + + // Cache for 1 hour (Twilio tokens are typically valid for 24 hours) + tokenCache = iceServers; + tokenExpiry = Date.now() + (60 * 60 * 1000); // 1 hour + + console.log(`Received ${iceServers.length} ICE servers from Twilio`); + resolve(iceServers); + } catch (error) { + reject(new Error(`Failed to parse Twilio response: ${error.message}`)); + } + }); + }); + + req.on('error', (error) => { + reject(new Error(`Twilio API request failed: ${error.message}`)); + }); + + req.end(); + }); +} + +/** + * Format ICE servers for neko + */ +function formatForNeko(twilioIceServers) { + // Twilio returns format: {"url": "...", "username": "...", "credential": "..."} + // Neko expects: {"urls": ["..."], "username": "...", "credential": "..."} + return twilioIceServers.map(server => { + if (server.url) { + // Add TCP transport for TURN servers in Cloud Run + let url = server.url; + if (url.startsWith('turn:') && !url.includes('transport=')) { + url += '?transport=tcp'; + } + + return { + urls: [url], + username: server.username, + credential: server.credential + }; + } + return server; + }).filter(server => { + // Only keep TURN servers for Cloud Run (STUN won't work) + return server.urls && server.urls[0] && server.urls[0].startsWith('turn'); + }); +} + +// Create Express server for health checks and credential endpoint +const app = express(); +const PORT = process.env.PORT || 3000; + +app.get('/health', (req, res) => { + res.json({ status: 'healthy' }); +}); + +app.get('/turn-credentials', async (req, res) => { + try { + const twilioServers = await getTwilioTurnCredentials(); + const nekoServers = formatForNeko(twilioServers); + + res.json({ + iceServers: nekoServers, + ttl: 3600, // 1 hour + expires: new Date(tokenExpiry).toISOString() + }); + } catch (error) { + console.error('Error getting TURN credentials:', error); + res.status(500).json({ error: error.message }); + } +}); + +// Standalone mode - get credentials and output for service.yaml +if (require.main === module) { + if (process.argv.includes('--server')) { + // Start HTTP server + app.listen(PORT, () => { + console.log(`Twilio token service listening on port ${PORT}`); + console.log(`Health check: http://localhost:${PORT}/health`); + console.log(`TURN credentials: http://localhost:${PORT}/turn-credentials`); + }); + } else { + // One-time credential generation + getTwilioTurnCredentials() + .then(twilioServers => { + const nekoServers = formatForNeko(twilioServers); + + console.log('\n=== Twilio TURN Credentials ==='); + console.log('For service.yaml, use:'); + console.log('- name: NEKO_ICESERVERS'); + console.log(` value: '${JSON.stringify(nekoServers)}'`); + console.log('\nCredentials expire in ~24 hours'); + console.log('Raw response:', JSON.stringify(twilioServers, null, 2)); + }) + .catch(error => { + console.error('Failed to get credentials:', error); + process.exit(1); + }); + } +} + +module.exports = { getTwilioTurnCredentials, formatForNeko }; \ No newline at end of file diff --git a/twilio/update-twilio-credentials.sh b/twilio/update-twilio-credentials.sh new file mode 100755 index 0000000..a468345 --- /dev/null +++ b/twilio/update-twilio-credentials.sh @@ -0,0 +1,96 @@ +#!/bin/bash + +# Update Cloud Run service with fresh Twilio TURN credentials +# This script should be run periodically (e.g., every hour via cron) +# Run from the root directory: ./twilio/update-twilio-credentials.sh + +set -e + +# Configuration +PROJECT_ID="${PROJECT_ID:-func-241017}" +SERVICE_NAME="kernel-browser" +REGION="us-central1" + +# Twilio credentials (set these as environment variables) +TWILIO_ACCOUNT_SID="${TWILIO_ACCOUNT_SID}" +TWILIO_AUTH_TOKEN="${TWILIO_AUTH_TOKEN}" + +if [ -z "$TWILIO_ACCOUNT_SID" ] || [ -z "$TWILIO_AUTH_TOKEN" ]; then + echo "โŒ Error: TWILIO_ACCOUNT_SID and TWILIO_AUTH_TOKEN must be set" + echo " Export them as environment variables:" + echo " export TWILIO_ACCOUNT_SID=your_account_sid" + echo " export TWILIO_AUTH_TOKEN=your_auth_token" + exit 1 +fi + +echo "๐Ÿ”„ Fetching fresh TURN credentials from Twilio..." + +# Get TURN credentials from Twilio API +response=$(curl -s -X POST \ + "https://api.twilio.com/2010-04-01/Accounts/${TWILIO_ACCOUNT_SID}/Tokens.json" \ + -u "${TWILIO_ACCOUNT_SID}:${TWILIO_AUTH_TOKEN}") + +# Check if request was successful +if ! echo "$response" | grep -q "ice_servers"; then + echo "โŒ Failed to get TURN credentials from Twilio" + echo "Response: $response" + exit 1 +fi + +# Format credentials for neko +ice_servers=$(echo "$response" | python3 -c " +import json +import sys +data = json.load(sys.stdin) +servers = [] +for server in data.get('ice_servers', []): + if server.get('url', '').startswith('turn'): + url = server['url'] + if 'transport=' not in url: + url += '?transport=tcp' + servers.append({ + 'urls': [url], + 'username': server.get('username', ''), + 'credential': server.get('credential', '') + }) +print(json.dumps(servers)) +") + +echo "โœ… Received fresh TURN credentials" +echo " ICE Servers: $ice_servers" + +# Update Cloud Run service with new credentials +echo "๐Ÿš€ Updating Cloud Run service..." + +# Create a temporary service.yaml with updated credentials +cat > /tmp/service-update.yaml < { + console.log(`Testing ${useTLS ? 'TLS' : 'TCP'} connection to ${host}:${port}...`); + + const options = { + host: host, + port: port, + rejectUnauthorized: false + }; + + const socket = useTLS ? + tls.connect(options, () => { + console.log(`โœ… TLS connection established to ${host}:${port}`); + socket.end(); + resolve(true); + }) : + net.connect(options, () => { + console.log(`โœ… TCP connection established to ${host}:${port}`); + socket.end(); + resolve(true); + }); + + socket.on('error', (err) => { + console.log(`โŒ Failed to connect: ${err.message}`); + resolve(false); + }); + + socket.setTimeout(5000, () => { + console.log(`โŒ Connection timeout`); + socket.destroy(); + resolve(false); + }); + }); +} + +// Run tests +(async () => { + await testTurnServer('global.turn.twilio.com', 3478, false); + await testTurnServer('global.turn.twilio.com', 5349, true); + + console.log('\n' + '=' .repeat(60)); + console.log('If connections succeed, credentials should work in service.yaml'); + console.log('=' .repeat(60)); +})(); \ No newline at end of file From 432a8630eed4ea02fa6ccb2d37359c748e8f02b4 Mon Sep 17 00:00:00 2001 From: Oleh Luchkiv Date: Mon, 8 Sep 2025 17:35:14 -0500 Subject: [PATCH 07/25] Removed obsolete cloudrun script --- start-chromium-cloudrun.sh | 32 -------------------------------- 1 file changed, 32 deletions(-) delete mode 100644 start-chromium-cloudrun.sh diff --git a/start-chromium-cloudrun.sh b/start-chromium-cloudrun.sh deleted file mode 100644 index 8e00e59..0000000 --- a/start-chromium-cloudrun.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/bash - -set -o pipefail -o errexit -o nounset - -# Cloud Run optimized Chromium launcher - no runuser needed since we're already kernel user - -echo "Starting Chromium launcher (Cloud Run mode)" - -# Resolve internal port for the remote debugging interface -INTERNAL_PORT="${INTERNAL_PORT:-9223}" - -# Load additional Chromium flags from env and optional file -CHROMIUM_FLAGS="${CHROMIUM_FLAGS:-}" -if [[ -f /chromium/flags ]]; then - CHROMIUM_FLAGS="$CHROMIUM_FLAGS $(cat /chromium/flags)" -fi -echo "CHROMIUM_FLAGS: $CHROMIUM_FLAGS" - -# Always use display :1 and point DBus to the system bus socket -export DISPLAY=":1" -export DBUS_SESSION_BUS_ADDRESS="unix:path=/tmp/dbus/system_bus_socket" -export XDG_CONFIG_HOME=/home/kernel/.config -export XDG_CACHE_HOME=/home/kernel/.cache -export HOME=/home/kernel - -echo "Running chromium as kernel user (Cloud Run mode)" -exec chromium \ - --remote-debugging-port="$INTERNAL_PORT" \ - --user-data-dir=/home/kernel/user-data \ - --password-store=basic \ - --no-first-run \ - ${CHROMIUM_FLAGS:-} \ No newline at end of file From 59989593abdc170336aed9c93620c422d4c06f30 Mon Sep 17 00:00:00 2001 From: Oleh Luchkiv Date: Mon, 8 Sep 2025 17:39:59 -0500 Subject: [PATCH 08/25] Fix Makefile --- Makefile | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Makefile b/Makefile index 4bc3420..7c9e9eb 100644 --- a/Makefile +++ b/Makefile @@ -49,11 +49,7 @@ restart: ## Restart containers @$(MAKE) --no-print-directory compose-up logs: ## Show container logs -<<<<<<< HEAD - docker-compose logs -f kernel-browser || docker logs -f kernel-browser-local -======= docker-compose logs -f kernel-browser || docker logs -f kernel-browser-extended ->>>>>>> main status: ## Show container status @echo "Docker Compose Status:" From f908215c266e0114922178028ae74be93c473780 Mon Sep 17 00:00:00 2001 From: Oleh Luchkiv Date: Mon, 8 Sep 2025 18:12:13 -0500 Subject: [PATCH 09/25] Fix the build --- cloudbuild.yaml | 32 ++++++++++++-------------------- deploy.sh | 2 +- 2 files changed, 13 insertions(+), 21 deletions(-) diff --git a/cloudbuild.yaml b/cloudbuild.yaml index d178da5..a5b1663 100644 --- a/cloudbuild.yaml +++ b/cloudbuild.yaml @@ -41,43 +41,35 @@ steps: - 'push' - 'us-docker.pkg.dev/$PROJECT_ID/gcr.io/kernel-browser:latest' - # Step 5: Choose appropriate service.yaml based on secrets availability + # Step 5: Deploy to Cloud Run with appropriate service.yaml - name: 'gcr.io/cloud-builders/gcloud' entrypoint: 'bash' args: - '-c' - | - # Check if Twilio secrets exist + # Check if Twilio secrets exist and choose appropriate service file if gcloud secrets describe twilio-account-sid --project=$PROJECT_ID >/dev/null 2>&1 && \ gcloud secrets describe twilio-auth-token --project=$PROJECT_ID >/dev/null 2>&1; then echo "Using service-secrets.yaml with Secret Manager references" - SERVICE_YAML="service-secrets.yaml" + SERVICE_FILE="service-secrets.yaml" else echo "Using standard service.yaml (secrets not configured)" - SERVICE_YAML="service.yaml" + SERVICE_FILE="service.yaml" fi # Update project ID in the chosen service file - sed -i "s/PROJECT_ID/$PROJECT_ID/g" $SERVICE_YAML + cp $$SERVICE_FILE temp-service.yaml + sed -i "s/PROJECT_ID/$PROJECT_ID/g" temp-service.yaml - echo "Deploying with: $SERVICE_YAML" - cat $SERVICE_YAML + echo "Deploying with: $$SERVICE_FILE" + cat temp-service.yaml - # Save the choice for next step - echo $SERVICE_YAML > /workspace/service_choice.txt - - # Step 6: Deploy to Cloud Run - - name: 'gcr.io/cloud-builders/gcloud' - entrypoint: 'bash' - args: - - '-c' - - | - SERVICE_YAML=$(cat /workspace/service_choice.txt) - gcloud run services replace $SERVICE_YAML \ + # Deploy to Cloud Run + gcloud run services replace temp-service.yaml \ --region=us-central1 \ --quiet - # Step 7: Update traffic to latest revision + # Step 6: Update traffic to latest revision - name: 'gcr.io/cloud-builders/gcloud' args: - 'run' @@ -88,7 +80,7 @@ steps: - '--region=us-central1' - '--quiet' - # Step 8: Get the service URL + # Step 7: Get the service URL - name: 'gcr.io/cloud-builders/gcloud' args: - 'run' diff --git a/deploy.sh b/deploy.sh index 50527e1..048bb3f 100755 --- a/deploy.sh +++ b/deploy.sh @@ -40,7 +40,7 @@ load_env_file() { info "Loading configuration from .env file..." # Export variables from .env, ignoring comments and empty lines set -a - source <(grep -v '^#' .env | grep -v '^$') + . .env set +a success "Configuration loaded from .env" elif [ -f .env.example ]; then From 6ff623db22cecefc15f2f8027caf1261d000256f Mon Sep 17 00:00:00 2001 From: Oleh Luchkiv Date: Mon, 8 Sep 2025 20:28:40 -0500 Subject: [PATCH 10/25] Fixed conlict in the run-local.sh --- run-local.sh | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/run-local.sh b/run-local.sh index d5dae5a..75b8dc0 100755 --- a/run-local.sh +++ b/run-local.sh @@ -50,21 +50,6 @@ echo "" echo "๐Ÿƒ Starting extended container with kernel-images run system..." -<<<<<<< HEAD -# Backup original run-docker.sh to modify port mappings -if [ ! -f run-docker.sh.original ]; then - cp run-docker.sh run-docker.sh.original -fi - -# Create modified run script that adds DevTools port mapping -cat run-docker.sh.original | \ -sed 's/docker run -it/docker run -it -p 8001:8001/' > run-docker.sh.extended - -chmod +x run-docker.sh.extended - -# Run using the modified run script with DevTools port -./run-docker.sh.extended -======= # Execute the kernel-images script setup but override the final docker run command # We'll replicate the essential parts here to avoid the sed hack @@ -120,7 +105,6 @@ fi # Run with our additional DevTools port mapping docker rm -f "$NAME" 2>/dev/null || true docker run -it "${RUN_ARGS[@]}" "$IMAGE" ->>>>>>> main echo "" echo "๐ŸŒ Extended service should be accessible at:" From f7a49cbad8a191250ac3409e2dff89091b7c0f8d Mon Sep 17 00:00:00 2001 From: Oleh Luchkiv Date: Mon, 8 Sep 2025 20:31:27 -0500 Subject: [PATCH 11/25] Remove test script --- twilio/verify-twilio.js | 84 ----------------------------------------- 1 file changed, 84 deletions(-) delete mode 100644 twilio/verify-twilio.js diff --git a/twilio/verify-twilio.js b/twilio/verify-twilio.js deleted file mode 100644 index 7d91745..0000000 --- a/twilio/verify-twilio.js +++ /dev/null @@ -1,84 +0,0 @@ -#!/usr/bin/env node - -const crypto = require('crypto'); - -// Your Twilio API credentials -const API_KEY_SID = 'SK5346918f48275d6571be927e84cfd6f8'; -const API_KEY_SECRET = 'OWJDRGxZZnxUlwOVXbupRs9yhQaylXzo'; - -// Generate credentials -const ttl = 86400; // 24 hours -const unixTimestamp = Math.floor(Date.now() / 1000) + ttl; -const username = `${unixTimestamp}:${API_KEY_SID}`; -const password = crypto - .createHmac('sha1', API_KEY_SECRET) - .update(username) - .digest('base64'); - -console.log('Testing Twilio TURN credentials locally'); -console.log('=' .repeat(60)); -console.log('Username:', username); -console.log('Password:', password); -console.log('=' .repeat(60)); - -// Test with curl commands -console.log('\nTest commands to verify TURN server access:\n'); - -// Test STUN binding -console.log('1. Test STUN binding (should work without auth):'); -console.log(`curl -X POST "https://global.turn.twilio.com:5349" --http1.1 -k`); - -// Test with turnutils if available -console.log('\n2. Test with turnutils_uclient (if installed):'); -console.log(`turnutils_uclient -T -p 3478 -u "${username}" -w "${password}" turn:global.turn.twilio.com`); - -// Test with Node.js TURN client -console.log('\n3. Testing connection with Node.js...\n'); - -const net = require('net'); -const tls = require('tls'); - -function testTurnServer(host, port, useTLS = false) { - return new Promise((resolve, reject) => { - console.log(`Testing ${useTLS ? 'TLS' : 'TCP'} connection to ${host}:${port}...`); - - const options = { - host: host, - port: port, - rejectUnauthorized: false - }; - - const socket = useTLS ? - tls.connect(options, () => { - console.log(`โœ… TLS connection established to ${host}:${port}`); - socket.end(); - resolve(true); - }) : - net.connect(options, () => { - console.log(`โœ… TCP connection established to ${host}:${port}`); - socket.end(); - resolve(true); - }); - - socket.on('error', (err) => { - console.log(`โŒ Failed to connect: ${err.message}`); - resolve(false); - }); - - socket.setTimeout(5000, () => { - console.log(`โŒ Connection timeout`); - socket.destroy(); - resolve(false); - }); - }); -} - -// Run tests -(async () => { - await testTurnServer('global.turn.twilio.com', 3478, false); - await testTurnServer('global.turn.twilio.com', 5349, true); - - console.log('\n' + '=' .repeat(60)); - console.log('If connections succeed, credentials should work in service.yaml'); - console.log('=' .repeat(60)); -})(); \ No newline at end of file From 457f8826a2220318c7d4697fdecff1d14ac7a04b Mon Sep 17 00:00:00 2001 From: Oleh Luchkiv Date: Mon, 8 Sep 2025 20:32:31 -0500 Subject: [PATCH 12/25] Remove test file --- twilio/test-twilio-node.js | 62 -------------------------------------- 1 file changed, 62 deletions(-) delete mode 100644 twilio/test-twilio-node.js diff --git a/twilio/test-twilio-node.js b/twilio/test-twilio-node.js deleted file mode 100644 index da67c9a..0000000 --- a/twilio/test-twilio-node.js +++ /dev/null @@ -1,62 +0,0 @@ -#!/usr/bin/env node - -const https = require('https'); - -const ACCOUNT_SID = 'SK5346918f48275d6571be927e84cfd6f8'; -const AUTH_TOKEN = 'OWJDRGxZZnxUlwOVXbupRs9yhQaylXzo'; - -function getTwilioTurnCredentials() { - return new Promise((resolve, reject) => { - console.log('Fetching TURN credentials from Twilio...'); - - const auth = Buffer.from(`${ACCOUNT_SID}:${AUTH_TOKEN}`).toString('base64'); - - const options = { - hostname: 'api.twilio.com', - port: 443, - path: `/2010-04-01/Accounts/${ACCOUNT_SID}/Tokens.json`, - method: 'POST', - headers: { - 'Authorization': `Basic ${auth}`, - 'Content-Type': 'application/x-www-form-urlencoded', - 'Content-Length': 0 - } - }; - - const req = https.request(options, (res) => { - let data = ''; - res.on('data', (chunk) => data += chunk); - res.on('end', () => { - try { - const response = JSON.parse(data); - if (res.statusCode === 201 || res.statusCode === 200) { - console.log('โœ… Success!'); - resolve(response.ice_servers || []); - } else { - reject(new Error(`API error: ${response.message}`)); - } - } catch (error) { - reject(error); - } - }); - }); - - req.on('error', reject); - req.end(); - }); -} - -getTwilioTurnCredentials() - .then(servers => { - const nekoServers = servers - .filter(s => s.url && s.url.startsWith('turn')) - .map(s => ({ - urls: [s.url], - username: s.username, - credential: s.credential - })); - - console.log('\nFormatted for NEKO_ICESERVERS:'); - console.log(JSON.stringify(nekoServers)); - }) - .catch(console.error); \ No newline at end of file From fbad4843499158913957d7f4e7cc5d3951d87a22 Mon Sep 17 00:00:00 2001 From: Oleh Luchkiv Date: Mon, 8 Sep 2025 20:33:47 -0500 Subject: [PATCH 13/25] Remove test files --- twilio/generate-twilio-credential.js | 42 ------- twilio/test-twilio-api.sh | 54 --------- twilio/test-twilio-turn.js | 45 ------- twilio/twilio-token-service.js | 168 --------------------------- 4 files changed, 309 deletions(-) delete mode 100644 twilio/generate-twilio-credential.js delete mode 100755 twilio/test-twilio-api.sh delete mode 100644 twilio/test-twilio-turn.js delete mode 100644 twilio/twilio-token-service.js diff --git a/twilio/generate-twilio-credential.js b/twilio/generate-twilio-credential.js deleted file mode 100644 index fcc384d..0000000 --- a/twilio/generate-twilio-credential.js +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/bin/env node - -const crypto = require('crypto'); - -// Twilio API credentials -const API_KEY_SID = 'SK5346918f48275d6571be927e84cfd6f8'; -const API_KEY_SECRET = process.env.TWILIO_API_KEY_SECRET || 'YOUR_API_KEY_SECRET_HERE'; - -// Time to live (in seconds) - 24 hours -const ttl = 86400; - -// Calculate expiration timestamp -const unixTimestamp = Math.floor(Date.now() / 1000) + ttl; - -// Create username (timestamp:apiKeySid) -const username = `${unixTimestamp}:${API_KEY_SID}`; - -// Generate password using HMAC-SHA1 -const password = crypto - .createHmac('sha1', API_KEY_SECRET) - .update(username) - .digest('base64'); - -console.log('Twilio TURN Credential Generator'); -console.log('=' .repeat(60)); -console.log('\nConfiguration:'); -console.log(`API Key SID: ${API_KEY_SID}`); -console.log(`API Key Secret: ${API_KEY_SECRET === 'YOUR_API_KEY_SECRET_HERE' ? '[NOT SET - Please provide]' : '[HIDDEN]'}`); -console.log(`TTL: ${ttl} seconds (${ttl/3600} hours)`); -console.log('\nGenerated Credentials:'); -console.log(`Username: ${username}`); -console.log(`Password: ${password}`); -console.log(`\nExpires at: ${new Date(unixTimestamp * 1000).toISOString()}`); - -console.log('\nFor service.yaml, use:'); -console.log(`- name: NEKO_ICESERVERS`); -console.log(` value: '[{"urls": ["turn:global.turn.twilio.com:3478?transport=tcp", "turns:global.turn.twilio.com:5349?transport=tcp"], "username": "${username}", "credential": "${password}"}]'`); - -if (API_KEY_SECRET === 'YOUR_API_KEY_SECRET_HERE') { - console.log('\nโš ๏ธ WARNING: You need to set the actual API Key Secret!'); - console.log('Run with: TWILIO_API_KEY_SECRET=your_actual_secret node generate-twilio-credential.js'); -} \ No newline at end of file diff --git a/twilio/test-twilio-api.sh b/twilio/test-twilio-api.sh deleted file mode 100755 index a07a03d..0000000 --- a/twilio/test-twilio-api.sh +++ /dev/null @@ -1,54 +0,0 @@ -#!/bin/bash - -# Test Twilio Network Traversal Service API -# This generates temporary TURN credentials - -ACCOUNT_SID="${TWILIO_ACCOUNT_SID:-YOUR_ACCOUNT_SID}" -AUTH_TOKEN="${TWILIO_AUTH_TOKEN:-YOUR_AUTH_TOKEN}" - -echo "Testing Twilio Network Traversal Service API" -echo "============================================" -echo "Account SID: $ACCOUNT_SID" -echo "" - -# Make API call to get TURN credentials -echo "Requesting TURN credentials from Twilio..." -echo "" - -response=$(curl -s -X POST \ - "https://api.twilio.com/2010-04-01/Accounts/${ACCOUNT_SID}/Tokens.json" \ - -u "${ACCOUNT_SID}:${AUTH_TOKEN}") - -# Check if request was successful -if echo "$response" | grep -q "ice_servers"; then - echo "โœ… Success! Received TURN credentials:" - echo "$response" | python3 -m json.tool - - # Extract and format for service.yaml - echo "" - echo "Formatted for NEKO_ICESERVERS:" - echo "$response" | python3 -c " -import json -import sys -data = json.load(sys.stdin) -servers = [] -for server in data.get('ice_servers', []): - if server.get('url', '').startswith('turn'): - url = server['url'] - if 'transport=' not in url: - url += '?transport=tcp' - servers.append({ - 'urls': [url], - 'username': server.get('username', ''), - 'credential': server.get('credential', '') - }) -print(json.dumps(servers)) -" -else - echo "โŒ Failed to get TURN credentials" - echo "Response: $response" - echo "" - echo "Make sure you have:" - echo "1. Valid Twilio Account SID and Auth Token" - echo "2. Network Traversal Service enabled on your Twilio account" -fi \ No newline at end of file diff --git a/twilio/test-twilio-turn.js b/twilio/test-twilio-turn.js deleted file mode 100644 index 58782d0..0000000 --- a/twilio/test-twilio-turn.js +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/env node - -// Test Twilio TURN server credentials -const crypto = require('crypto'); - -// Parse the credentials from service.yaml -const username = "1757273052:SK5346918f48275d6571be927e84cfd6f8"; -const credential = "12HiXDndTPnUQZorm6TDDHd9Co8="; - -console.log("Testing Twilio TURN credentials:"); -console.log("Username:", username); -console.log("Credential:", credential); - -// Extract timestamp from username -const parts = username.split(':'); -const timestamp = parseInt(parts[0]); -const apiKeySid = parts[1]; - -console.log("\nParsed values:"); -console.log("Timestamp:", timestamp); -console.log("API Key SID:", apiKeySid); - -// Check if timestamp is valid (not expired) -const now = Math.floor(Date.now() / 1000); -const expiresIn = timestamp - now; - -console.log("\nTimestamp validation:"); -console.log("Current time (Unix):", now); -console.log("Credential timestamp:", timestamp); -console.log("Expires in:", expiresIn, "seconds"); - -if (expiresIn < 0) { - console.log("โŒ Credentials have EXPIRED!"); -} else { - console.log("โœ… Credentials are still valid for", Math.floor(expiresIn / 3600), "hours"); -} - -// To verify the credential, we would need the API Key Secret -// The credential should be: base64(hmac-sha1(username, apiKeySecret)) -console.log("\nNote: To fully verify the credential, we would need the API Key Secret."); -console.log("The credential should be computed as: base64(hmac-sha1(username, apiKeySecret))"); - -// Test with curl (requires actual network test) -console.log("\nTo test the TURN server directly, you can use a tool like 'turnutils_uclient':"); -console.log(`turnutils_uclient -T -p 3478 -u "${username}" -w "${credential}" turn:global.turn.twilio.com`); \ No newline at end of file diff --git a/twilio/twilio-token-service.js b/twilio/twilio-token-service.js deleted file mode 100644 index a845e78..0000000 --- a/twilio/twilio-token-service.js +++ /dev/null @@ -1,168 +0,0 @@ -#!/usr/bin/env node - -/** - * Twilio Network Traversal Service Token Generator - * Generates short-lived TURN credentials using Twilio's API - */ - -const https = require('https'); -const express = require('express'); - -// Twilio Account credentials (these are different from API Key) -const ACCOUNT_SID = process.env.TWILIO_ACCOUNT_SID || 'YOUR_ACCOUNT_SID'; -const AUTH_TOKEN = process.env.TWILIO_AUTH_TOKEN || 'YOUR_AUTH_TOKEN'; - -// Optional: API Key credentials (if using API keys instead of master credentials) -const API_KEY_SID = process.env.TWILIO_API_KEY_SID || 'SK5346918f48275d6571be927e84cfd6f8'; -const API_KEY_SECRET = process.env.TWILIO_API_KEY_SECRET || 'OWJDRGxZZnxUlwOVXbupRs9yhQaylXzo'; - -// Cache for tokens -let tokenCache = null; -let tokenExpiry = 0; - -/** - * Get TURN credentials from Twilio Network Traversal Service - */ -async function getTwilioTurnCredentials() { - return new Promise((resolve, reject) => { - // Check cache first - if (tokenCache && Date.now() < tokenExpiry) { - console.log('Returning cached TURN credentials'); - return resolve(tokenCache); - } - - console.log('Fetching new TURN credentials from Twilio...'); - - // Twilio API endpoint for Network Traversal Service - const options = { - hostname: 'api.twilio.com', - port: 443, - path: `/2010-04-01/Accounts/${ACCOUNT_SID}/Tokens.json`, - method: 'POST', - auth: `${ACCOUNT_SID}:${AUTH_TOKEN}`, - headers: { - 'Content-Type': 'application/x-www-form-urlencoded', - 'Content-Length': 0 - } - }; - - const req = https.request(options, (res) => { - let data = ''; - - res.on('data', (chunk) => { - data += chunk; - }); - - res.on('end', () => { - try { - const response = JSON.parse(data); - - if (res.statusCode !== 201 && res.statusCode !== 200) { - console.error('Twilio API error:', response); - return reject(new Error(`Twilio API error: ${response.message || 'Unknown error'}`)); - } - - // Parse the ice_servers from response - const iceServers = response.ice_servers || []; - - // Cache for 1 hour (Twilio tokens are typically valid for 24 hours) - tokenCache = iceServers; - tokenExpiry = Date.now() + (60 * 60 * 1000); // 1 hour - - console.log(`Received ${iceServers.length} ICE servers from Twilio`); - resolve(iceServers); - } catch (error) { - reject(new Error(`Failed to parse Twilio response: ${error.message}`)); - } - }); - }); - - req.on('error', (error) => { - reject(new Error(`Twilio API request failed: ${error.message}`)); - }); - - req.end(); - }); -} - -/** - * Format ICE servers for neko - */ -function formatForNeko(twilioIceServers) { - // Twilio returns format: {"url": "...", "username": "...", "credential": "..."} - // Neko expects: {"urls": ["..."], "username": "...", "credential": "..."} - return twilioIceServers.map(server => { - if (server.url) { - // Add TCP transport for TURN servers in Cloud Run - let url = server.url; - if (url.startsWith('turn:') && !url.includes('transport=')) { - url += '?transport=tcp'; - } - - return { - urls: [url], - username: server.username, - credential: server.credential - }; - } - return server; - }).filter(server => { - // Only keep TURN servers for Cloud Run (STUN won't work) - return server.urls && server.urls[0] && server.urls[0].startsWith('turn'); - }); -} - -// Create Express server for health checks and credential endpoint -const app = express(); -const PORT = process.env.PORT || 3000; - -app.get('/health', (req, res) => { - res.json({ status: 'healthy' }); -}); - -app.get('/turn-credentials', async (req, res) => { - try { - const twilioServers = await getTwilioTurnCredentials(); - const nekoServers = formatForNeko(twilioServers); - - res.json({ - iceServers: nekoServers, - ttl: 3600, // 1 hour - expires: new Date(tokenExpiry).toISOString() - }); - } catch (error) { - console.error('Error getting TURN credentials:', error); - res.status(500).json({ error: error.message }); - } -}); - -// Standalone mode - get credentials and output for service.yaml -if (require.main === module) { - if (process.argv.includes('--server')) { - // Start HTTP server - app.listen(PORT, () => { - console.log(`Twilio token service listening on port ${PORT}`); - console.log(`Health check: http://localhost:${PORT}/health`); - console.log(`TURN credentials: http://localhost:${PORT}/turn-credentials`); - }); - } else { - // One-time credential generation - getTwilioTurnCredentials() - .then(twilioServers => { - const nekoServers = formatForNeko(twilioServers); - - console.log('\n=== Twilio TURN Credentials ==='); - console.log('For service.yaml, use:'); - console.log('- name: NEKO_ICESERVERS'); - console.log(` value: '${JSON.stringify(nekoServers)}'`); - console.log('\nCredentials expire in ~24 hours'); - console.log('Raw response:', JSON.stringify(twilioServers, null, 2)); - }) - .catch(error => { - console.error('Failed to get credentials:', error); - process.exit(1); - }); - } -} - -module.exports = { getTwilioTurnCredentials, formatForNeko }; \ No newline at end of file From 202b50bb16ba2e3adcb5308a85232aa7a5b211ec Mon Sep 17 00:00:00 2001 From: Oleh Luchkiv Date: Mon, 8 Sep 2025 20:44:58 -0500 Subject: [PATCH 14/25] Remove hardcoded credentials --- cloudbuild.yaml | 4 ++-- service.yaml | 11 ++++------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/cloudbuild.yaml b/cloudbuild.yaml index a5b1663..e2b34a9 100644 --- a/cloudbuild.yaml +++ b/cloudbuild.yaml @@ -58,10 +58,10 @@ steps: fi # Update project ID in the chosen service file - cp $$SERVICE_FILE temp-service.yaml + cp $SERVICE_FILE temp-service.yaml sed -i "s/PROJECT_ID/$PROJECT_ID/g" temp-service.yaml - echo "Deploying with: $$SERVICE_FILE" + echo "Deploying with: $SERVICE_FILE" cat temp-service.yaml # Deploy to Cloud Run diff --git a/service.yaml b/service.yaml index f39c167..8013a07 100644 --- a/service.yaml +++ b/service.yaml @@ -59,13 +59,10 @@ spec: - name: WIDTH value: "1024" # Twilio API Key credentials for dynamic TURN generation - - name: TWILIO_ACCOUNT_SID - value: "SK5346918f48275d6571be927e84cfd6f8" - - name: TWILIO_AUTH_TOKEN - value: "OWJDRGxZZnxUlwOVXbupRs9yhQaylXzo" - # Fresh Twilio TURN credentials (manually generated for now) + # No Twilio credentials in fallback mode - credentials handled by twilio-credential-updater.sh + # Fallback TURN servers (used when Twilio credentials are not configured) - name: NEKO_ICESERVERS - value: '[{"urls":["turn:global.turn.twilio.com:3478?transport=tcp"],"username":"b88cfa1369190aa9cbc8bfaca683c457476b5d7062aa0a7b184c87db3ade0ff5","credential":"m3oPEt94gQQP+g2yd4R32MuZtCCdw6Rmmuvkp6/Dkd0="},{"urls":["turn:global.turn.twilio.com:443?transport=tcp"],"username":"b88cfa1369190aa9cbc8bfaca683c457476b5d7062aa0a7b184c87db3ade0ff5","credential":"m3oPEt94gQQP+g2yd4R32MuZtCCdw6Rmmuvkp6/Dkd0="}]' + value: '[{"urls":["stun:stun.l.google.com:19302"]},{"urls":["turn:openrelay.metered.ca:80"],"username":"openrelayproject","credential":"openrelayproject"}]' # Disable TCP multiplexing (nginx handles port 8080) - name: NEKO_WEBRTC_TCPMUX value: "0" @@ -83,7 +80,7 @@ spec: value: "/tmp/recordings" # Force new revision - name: DEPLOYMENT_VERSION - value: "v11-fresh-twilio-credentials" + value: "v12-fallback-configuration" traffic: - percent: 100 latestRevision: true \ No newline at end of file From 40b559056bb5c22b25de245d28471cbdecf8e71d Mon Sep 17 00:00:00 2001 From: Oleh Luchkiv Date: Mon, 8 Sep 2025 20:48:49 -0500 Subject: [PATCH 15/25] Fixed cloubuild.yaml --- cloudbuild.yaml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cloudbuild.yaml b/cloudbuild.yaml index e2b34a9..4d2bfe2 100644 --- a/cloudbuild.yaml +++ b/cloudbuild.yaml @@ -51,17 +51,16 @@ steps: if gcloud secrets describe twilio-account-sid --project=$PROJECT_ID >/dev/null 2>&1 && \ gcloud secrets describe twilio-auth-token --project=$PROJECT_ID >/dev/null 2>&1; then echo "Using service-secrets.yaml with Secret Manager references" - SERVICE_FILE="service-secrets.yaml" + cp service-secrets.yaml temp-service.yaml else echo "Using standard service.yaml (secrets not configured)" - SERVICE_FILE="service.yaml" + cp service.yaml temp-service.yaml fi # Update project ID in the chosen service file - cp $SERVICE_FILE temp-service.yaml sed -i "s/PROJECT_ID/$PROJECT_ID/g" temp-service.yaml - echo "Deploying with: $SERVICE_FILE" + echo "Deploying service configuration:" cat temp-service.yaml # Deploy to Cloud Run From 5f9e3a0cab789ddf2dc831db156e19e03de44a3c Mon Sep 17 00:00:00 2001 From: Oleh Luchkiv Date: Mon, 8 Sep 2025 21:17:38 -0500 Subject: [PATCH 16/25] Switching to TCP instead UDP for WebRTC --- service-secrets.yaml | 10 ++++++--- service.yaml | 14 ++++++++----- twilio/twilio-credential-updater.sh | 32 ++++++++++++++++++++--------- 3 files changed, 38 insertions(+), 18 deletions(-) diff --git a/service-secrets.yaml b/service-secrets.yaml index 34b4b91..f3c983e 100644 --- a/service-secrets.yaml +++ b/service-secrets.yaml @@ -73,9 +73,13 @@ spec: # The twilio-credential-updater.sh script will use these at startup - name: NEKO_ICESERVERS value: 'DYNAMIC' # Placeholder - will be replaced by twilio-credential-updater.sh - # Disable TCP multiplexing (nginx handles port 8080) + # TCP-only WebRTC configuration for Cloud Run - name: NEKO_WEBRTC_TCPMUX - value: "0" + value: "0" # Disable TCP multiplexing (nginx handles port 8080) + - name: NEKO_WEBRTC_ICE_LITE + value: "true" # Use ICE-Lite mode for server + - name: NEKO_WEBRTC_ICE_SERVERS_ONLY_TURN + value: "true" # Only use TURN servers, no STUN # Optional: Google Cloud Storage bucket for recordings - name: GCS_BUCKET value: "kernel-browser-recordings" @@ -90,7 +94,7 @@ spec: value: "/tmp/recordings" # Force new revision - name: DEPLOYMENT_VERSION - value: "v12-secret-manager" + value: "v13-tcp-only-webrtc" traffic: - percent: 100 latestRevision: true \ No newline at end of file diff --git a/service.yaml b/service.yaml index 8013a07..e13d6e9 100644 --- a/service.yaml +++ b/service.yaml @@ -60,12 +60,16 @@ spec: value: "1024" # Twilio API Key credentials for dynamic TURN generation # No Twilio credentials in fallback mode - credentials handled by twilio-credential-updater.sh - # Fallback TURN servers (used when Twilio credentials are not configured) + # TCP-only TURN servers for Cloud Run (no STUN/UDP) - name: NEKO_ICESERVERS - value: '[{"urls":["stun:stun.l.google.com:19302"]},{"urls":["turn:openrelay.metered.ca:80"],"username":"openrelayproject","credential":"openrelayproject"}]' - # Disable TCP multiplexing (nginx handles port 8080) + value: '[{"urls":["turn:openrelay.metered.ca:80?transport=tcp"],"username":"openrelayproject","credential":"openrelayproject"},{"urls":["turns:openrelay.metered.ca:443?transport=tcp"],"username":"openrelayproject","credential":"openrelayproject"}]' + # TCP-only WebRTC configuration for Cloud Run - name: NEKO_WEBRTC_TCPMUX - value: "0" + value: "0" # Disable TCP multiplexing (nginx handles port 8080) + - name: NEKO_WEBRTC_ICE_LITE + value: "true" # Use ICE-Lite mode for server + - name: NEKO_WEBRTC_ICE_SERVERS_ONLY_TURN + value: "true" # Only use TURN servers, no STUN # Optional: Google Cloud Storage bucket for recordings - name: GCS_BUCKET value: "kernel-browser-recordings" @@ -80,7 +84,7 @@ spec: value: "/tmp/recordings" # Force new revision - name: DEPLOYMENT_VERSION - value: "v12-fallback-configuration" + value: "v13-tcp-only-fallback" traffic: - percent: 100 latestRevision: true \ No newline at end of file diff --git a/twilio/twilio-credential-updater.sh b/twilio/twilio-credential-updater.sh index 43fe575..80255ea 100644 --- a/twilio/twilio-credential-updater.sh +++ b/twilio/twilio-credential-updater.sh @@ -19,9 +19,9 @@ ACCOUNT_SID="${TWILIO_ACCOUNT_SID}" AUTH_TOKEN="${TWILIO_AUTH_TOKEN}" if [ -z "$ACCOUNT_SID" ] || [ -z "$AUTH_TOKEN" ]; then - echo "[twilio-updater] Warning: Twilio credentials not set, using fallback TURN servers" - # Export fallback servers - export NEKO_ICESERVERS='[{"urls": ["turn:openrelay.metered.ca:80?transport=tcp"], "username": "openrelayproject", "credential": "openrelayproject"}]' + echo "[twilio-updater] Warning: Twilio credentials not set, using TCP-only fallback TURN servers" + # Export TCP-only fallback servers (no STUN for Cloud Run) + export NEKO_ICESERVERS='[{"urls": ["turn:openrelay.metered.ca:80?transport=tcp"], "username": "openrelayproject", "credential": "openrelayproject"}, {"urls": ["turns:openrelay.metered.ca:443?transport=tcp"], "username": "openrelayproject", "credential": "openrelayproject"}]' return 0 2>/dev/null || exit 0 fi @@ -34,7 +34,7 @@ response=$(curl -s -X POST \ # Check if request was successful if echo "$response" | grep -q "ice_servers"; then - # Format credentials for neko + # Format credentials for neko (TCP-only for Cloud Run) ice_servers=$(echo "$response" | python3 -c " import json import sys @@ -44,13 +44,25 @@ try: for server in data.get('ice_servers', []): if server.get('url', '').startswith('turn'): url = server['url'] - if 'transport=' not in url: - url += '?transport=tcp' + # Force TCP transport for Cloud Run compatibility + if '?transport=' in url: + url = url.split('?transport=')[0] + url += '?transport=tcp' servers.append({ 'urls': [url], 'username': server.get('username', ''), 'credential': server.get('credential', '') }) + + # Also add TLS version for redundancy + tls_url = url.replace('turn:', 'turns:').replace(':3478', ':5349') + servers.append({ + 'urls': [tls_url], + 'username': server.get('username', ''), + 'credential': server.get('credential', '') + }) + + # Remove STUN servers - only use TURN for Cloud Run print(json.dumps(servers)) except: print('[]') @@ -60,13 +72,13 @@ except: echo "[twilio-updater] Successfully retrieved TURN credentials" export NEKO_ICESERVERS="$ice_servers" else - echo "[twilio-updater] Failed to parse TURN credentials, using fallback" - export NEKO_ICESERVERS='[{"urls": ["turn:openrelay.metered.ca:80?transport=tcp"], "username": "openrelayproject", "credential": "openrelayproject"}]' + echo "[twilio-updater] Failed to parse TURN credentials, using TCP-only fallback" + export NEKO_ICESERVERS='[{"urls": ["turn:openrelay.metered.ca:80?transport=tcp"], "username": "openrelayproject", "credential": "openrelayproject"}, {"urls": ["turns:openrelay.metered.ca:443?transport=tcp"], "username": "openrelayproject", "credential": "openrelayproject"}]' fi else - echo "[twilio-updater] Failed to get TURN credentials from Twilio, using fallback" + echo "[twilio-updater] Failed to get TURN credentials from Twilio, using TCP-only fallback" echo "[twilio-updater] Response: ${response:0:100}..." - export NEKO_ICESERVERS='[{"urls": ["turn:openrelay.metered.ca:80?transport=tcp"], "username": "openrelayproject", "credential": "openrelayproject"}]' + export NEKO_ICESERVERS='[{"urls": ["turn:openrelay.metered.ca:80?transport=tcp"], "username": "openrelayproject", "credential": "openrelayproject"}, {"urls": ["turns:openrelay.metered.ca:443?transport=tcp"], "username": "openrelayproject", "credential": "openrelayproject"}]' fi echo "[twilio-updater] NEKO_ICESERVERS set to: ${NEKO_ICESERVERS:0:100}..." \ No newline at end of file From 0a3ab42b650cca8a3ce3004c93d0dba3952d59c2 Mon Sep 17 00:00:00 2001 From: Tyson Thomas Date: Tue, 9 Sep 2025 08:51:06 -0700 Subject: [PATCH 17/25] fix the deployment issue --- service-secrets.yaml | 2 +- service.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/service-secrets.yaml b/service-secrets.yaml index f3c983e..d265334 100644 --- a/service-secrets.yaml +++ b/service-secrets.yaml @@ -28,7 +28,7 @@ spec: containers: - name: kernel-browser # This will be set during deployment - image: us-docker.pkg.dev/func-241017/gcr.io/kernel-browser:latest + image: us-docker.pkg.dev/PROJECT_ID/gcr.io/kernel-browser:latest ports: - name: http1 containerPort: 8080 diff --git a/service.yaml b/service.yaml index e13d6e9..c78c45b 100644 --- a/service.yaml +++ b/service.yaml @@ -28,7 +28,7 @@ spec: containers: - name: kernel-browser # This will be set during deployment - image: us-docker.pkg.dev/func-241017/gcr.io/kernel-browser:latest + image: us-docker.pkg.dev/PROJECT_ID/gcr.io/kernel-browser:latest ports: - name: http1 containerPort: 8080 From ca565e40825e755f91d39217c7b9414bc6a8a668 Mon Sep 17 00:00:00 2001 From: Oleh Luchkiv Date: Tue, 9 Sep 2025 13:54:17 -0500 Subject: [PATCH 18/25] Fixed hardcoded project name; removed google as a start page --- deploy.sh | 2 +- supervisor/services-cloudrun/chromium.conf | 2 +- twilio/update-twilio-credentials.sh | 26 ++++++++++++++++++---- 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/deploy.sh b/deploy.sh index 048bb3f..2996275 100755 --- a/deploy.sh +++ b/deploy.sh @@ -312,7 +312,7 @@ deploy_local() { # Update service file with project ID and image cp "$service_file" "${service_file}.tmp" sed -i.bak "s/PROJECT_ID/$PROJECT_ID/g" "${service_file}.tmp" - sed -i.bak "s|us-docker.pkg.dev/func-241017/gcr.io/kernel-browser:latest|$image_name|g" "${service_file}.tmp" + sed -i.bak "s|us-docker.pkg.dev/$PROJECT_ID/gcr.io/kernel-browser:latest|$image_name|g" "${service_file}.tmp" gcloud run services replace "${service_file}.tmp" \ --region="$REGION" \ diff --git a/supervisor/services-cloudrun/chromium.conf b/supervisor/services-cloudrun/chromium.conf index 6fdc548..d8413f5 100644 --- a/supervisor/services-cloudrun/chromium.conf +++ b/supervisor/services-cloudrun/chromium.conf @@ -1,5 +1,5 @@ [program:chromium] -command=/bin/bash -lc 'sleep 3 && DISPLAY=":1" DBUS_SESSION_BUS_ADDRESS="unix:path=/tmp/dbus/session_bus_socket" chromium --remote-debugging-port=9223 --remote-allow-origins=* --user-data-dir=/home/kernel/user-data --password-store=basic --no-first-run --disable-dev-shm-usage --disable-gpu --start-maximized --disable-software-rasterizer --no-sandbox --disable-setuid-sandbox --disable-features=VizDisplayCompositor --custom-devtools-frontend=http://localhost:8001/ https://www.google.com' +command=/bin/bash -lc 'sleep 3 && DISPLAY=":1" DBUS_SESSION_BUS_ADDRESS="unix:path=/tmp/dbus/session_bus_socket" chromium --remote-debugging-port=9223 --remote-allow-origins=* --user-data-dir=/home/kernel/user-data --password-store=basic --no-first-run --disable-dev-shm-usage --disable-gpu --start-maximized --disable-software-rasterizer --no-sandbox --disable-setuid-sandbox --disable-features=VizDisplayCompositor --custom-devtools-frontend=http://localhost:8001/' autostart=true autorestart=true startsecs=8 diff --git a/twilio/update-twilio-credentials.sh b/twilio/update-twilio-credentials.sh index a468345..3ce4364 100755 --- a/twilio/update-twilio-credentials.sh +++ b/twilio/update-twilio-credentials.sh @@ -6,18 +6,36 @@ set -e +# Load environment variables from .env file if it exists +if [ -f ../.env ]; then + set -a + . ../.env + set +a +elif [ -f .env ]; then + set -a + . .env + set +a +fi + # Configuration -PROJECT_ID="${PROJECT_ID:-func-241017}" +PROJECT_ID="${PROJECT_ID}" SERVICE_NAME="kernel-browser" -REGION="us-central1" +REGION="${REGION:-us-central1}" -# Twilio credentials (set these as environment variables) +# Twilio credentials (from environment or .env file) TWILIO_ACCOUNT_SID="${TWILIO_ACCOUNT_SID}" TWILIO_AUTH_TOKEN="${TWILIO_AUTH_TOKEN}" +if [ -z "$PROJECT_ID" ]; then + echo "โŒ Error: PROJECT_ID must be set" + echo " Set it in your .env file or export as environment variable:" + echo " export PROJECT_ID=your-project-id" + exit 1 +fi + if [ -z "$TWILIO_ACCOUNT_SID" ] || [ -z "$TWILIO_AUTH_TOKEN" ]; then echo "โŒ Error: TWILIO_ACCOUNT_SID and TWILIO_AUTH_TOKEN must be set" - echo " Export them as environment variables:" + echo " Set them in your .env file or export as environment variables:" echo " export TWILIO_ACCOUNT_SID=your_account_sid" echo " export TWILIO_AUTH_TOKEN=your_auth_token" exit 1 From 7c6253de22448b6aa5d63b4b338332c4dd535a8a Mon Sep 17 00:00:00 2001 From: Oleh Luchkiv Date: Tue, 9 Sep 2025 13:54:52 -0500 Subject: [PATCH 19/25] Modified example env file --- .env.example | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.env.example b/.env.example index 268baa2..2aee412 100644 --- a/.env.example +++ b/.env.example @@ -8,9 +8,9 @@ TWILIO_ACCOUNT_SID=SK...your_api_key_sid_here TWILIO_AUTH_TOKEN=your_api_key_secret_here -# Optional: Google Cloud Configuration +# Google Cloud Configuration # If not provided, will use current gcloud config -# PROJECT_ID=your-project-id +PROJECT_ID=your-gcp-project-id # REGION=us-central1 # Optional: Service Configuration From c541c5336aec2194721168708976c8a8a43dd2f1 Mon Sep 17 00:00:00 2001 From: Oleh Luchkiv Date: Wed, 10 Sep 2025 10:59:44 -0500 Subject: [PATCH 20/25] A follow up changes to improve CloudRun deployment --- Readme.md | 23 +++++++++++- cloudbuild.yaml | 52 ++++++++++++++++++-------- cloudrun-wrapper.sh | 11 ++++++ deploy.sh | 23 ++++++++++++ nginx.conf | 1 + service-secrets.yaml | 4 +- supervisor/services-cloudrun/neko.conf | 2 +- twilio/update-twilio-credentials.sh | 1 + 8 files changed, 96 insertions(+), 21 deletions(-) diff --git a/Readme.md b/Readme.md index 20b38e9..1594f56 100644 --- a/Readme.md +++ b/Readme.md @@ -227,16 +227,35 @@ Example: 1 hour session โ‰ˆ $0.50-1.00 The `cloudbuild.yaml` provides: 1. Submodule initialization -2. Docker image build +2. Docker image build with caching 3. Container Registry push 4. Cloud Run deployment 5. Traffic routing -Trigger builds via: +### Build Commands + ```bash +# Normal build (with cache) - recommended for development gcloud builds submit --config cloudbuild.yaml + +# Force rebuild without cache - use when dependencies change +gcloud builds submit --config cloudbuild.yaml --substitutions=_NO_CACHE=true + +# Automated deployment with Twilio TURN server setup +./deploy.sh ``` +### Cache Control + +The build system uses Docker layer caching by default to reduce build times and costs: +- **With cache**: ~5-10 minutes, lower cost +- **Without cache**: ~30+ minutes, higher cost (~$3-5 per build) + +Use `_NO_CACHE=true` only when: +- Dependencies have changed significantly +- Base images need updating +- Debugging build issues + ## ๐Ÿ“š Additional Resources - [kernel-images Documentation](https://github.com/onkernel/kernel-images) diff --git a/cloudbuild.yaml b/cloudbuild.yaml index 4d2bfe2..7499986 100644 --- a/cloudbuild.yaml +++ b/cloudbuild.yaml @@ -1,4 +1,8 @@ # Cloud Build configuration for kernel-browser +# Usage: gcloud builds submit --substitutions=_NO_CACHE=true (to disable cache) +substitutions: + _NO_CACHE: 'false' + steps: # Step 1: Verify kernel-images directory exists - name: 'gcr.io/cloud-builders/docker' @@ -17,22 +21,36 @@ steps: args: - '-c' - | - echo "Attempting to pull previous image for caching..." - docker pull us-docker.pkg.dev/$PROJECT_ID/gcr.io/kernel-browser:latest || echo "No previous image found for caching" + if [ "${_NO_CACHE}" = "true" ]; then + echo "โš ๏ธ Cache disabled by _NO_CACHE=true flag" + else + echo "Attempting to pull previous image for caching..." + docker pull us-docker.pkg.dev/$PROJECT_ID/gcr.io/kernel-browser:latest || echo "No previous image found for caching" + fi # Step 3: Build the Docker image with caching (using cloudrun Dockerfile) - name: 'gcr.io/cloud-builders/docker' + entrypoint: 'bash' args: - - 'build' - - '--file' - - 'Dockerfile.cloudrun' - - '--cache-from' - - 'us-docker.pkg.dev/$PROJECT_ID/gcr.io/kernel-browser:latest' - - '--build-arg' - - 'CACHE_BUST=$BUILD_ID' - - '--tag' - - 'us-docker.pkg.dev/$PROJECT_ID/gcr.io/kernel-browser:latest' - - '.' + - '-c' + - | + if [ "${_NO_CACHE}" = "true" ]; then + echo "๐Ÿ”จ Building without cache..." + docker build \ + --file Dockerfile.cloudrun \ + --no-cache \ + --build-arg CACHE_BUST=$BUILD_ID \ + --tag us-docker.pkg.dev/$PROJECT_ID/gcr.io/kernel-browser:latest \ + . + else + echo "๐Ÿš€ Building with cache from previous image..." + docker build \ + --file Dockerfile.cloudrun \ + --cache-from us-docker.pkg.dev/$PROJECT_ID/gcr.io/kernel-browser:latest \ + --build-arg CACHE_BUST=$BUILD_ID \ + --tag us-docker.pkg.dev/$PROJECT_ID/gcr.io/kernel-browser:latest \ + . + fi timeout: '3600s' # Allow 1 hour for build (it's a large image) # Step 4: Push the image to Artifact Registry @@ -48,12 +66,14 @@ steps: - '-c' - | # Check if Twilio secrets exist and choose appropriate service file - if gcloud secrets describe twilio-account-sid --project=$PROJECT_ID >/dev/null 2>&1 && \ - gcloud secrets describe twilio-auth-token --project=$PROJECT_ID >/dev/null 2>&1; then - echo "Using service-secrets.yaml with Secret Manager references" + echo "Checking for Twilio secrets..." + if gcloud secrets describe twilio-account-sid --project=$PROJECT_ID && \ + gcloud secrets describe twilio-auth-token --project=$PROJECT_ID; then + echo "โœ… Twilio secrets found! Using service-secrets.yaml with Secret Manager references" cp service-secrets.yaml temp-service.yaml else - echo "Using standard service.yaml (secrets not configured)" + echo "โš ๏ธ Twilio secrets NOT found. Using standard service.yaml (secrets not configured)" + echo "To use Twilio TURN servers, run: ./deploy.sh to set up secrets" cp service.yaml temp-service.yaml fi diff --git a/cloudrun-wrapper.sh b/cloudrun-wrapper.sh index c49c995..9c4529e 100644 --- a/cloudrun-wrapper.sh +++ b/cloudrun-wrapper.sh @@ -13,6 +13,12 @@ export HEIGHT=768 export WIDTH=1024 export NEKO_BIND=:8081 +# WebRTC Cloud Run configuration - force relay-only mode +export NEKO_WEBRTC_ICE_LITE=true +export NEKO_WEBRTC_ICE_POLICY=relay +export NEKO_WEBRTC_MDNS=false +export NEKO_WEBRTC_ICE_INTERFACES="" + # Get fresh Twilio TURN credentials if available if [ -f /twilio-credential-updater.sh ]; then echo "[cloudrun-wrapper] Getting fresh Twilio TURN credentials..." @@ -51,6 +57,9 @@ http { include /etc/nginx/mime.types; default_type application/octet-stream; + # Configure log files to use /tmp for non-root execution + access_log /tmp/cloudrun-nginx-access.log; + # Create temp directories for nginx (non-root execution) client_body_temp_path /tmp/nginx_client_temp; proxy_temp_path /tmp/nginx_proxy_temp; @@ -120,6 +129,7 @@ http { # Chrome DevTools Protocol HTTP endpoints location /json { proxy_pass http://127.0.0.1:9223/json; + proxy_http_version 1.1; proxy_set_header Host \$host; proxy_set_header X-Real-IP \$remote_addr; proxy_set_header X-Forwarded-For \$proxy_add_x_forwarded_for; @@ -129,6 +139,7 @@ http { # Chrome DevTools Protocol HTTP endpoints (with trailing slash) location /json/ { proxy_pass http://127.0.0.1:9223/json/; + proxy_http_version 1.1; proxy_set_header Host \$host; proxy_set_header X-Real-IP \$remote_addr; proxy_set_header X-Forwarded-For \$proxy_add_x_forwarded_for; diff --git a/deploy.sh b/deploy.sh index 2996275..55d344b 100755 --- a/deploy.sh +++ b/deploy.sh @@ -149,6 +149,29 @@ setup_secrets() { --project="$PROJECT_ID" \ --quiet + # Grant Cloud Build service account permission to view secrets (needed for cloudbuild.yaml) + local project_number=$(gcloud projects describe "$PROJECT_ID" --format="value(projectNumber)") + local cb_sa_email="${project_number}@cloudbuild.gserviceaccount.com" + + info "Granting Secret Manager viewer access to Cloud Build service account..." + gcloud projects add-iam-policy-binding "$PROJECT_ID" \ + --member="serviceAccount:$cb_sa_email" \ + --role="roles/secretmanager.viewer" \ + --project="$PROJECT_ID" \ + --quiet + + gcloud secrets add-iam-policy-binding twilio-account-sid \ + --member="serviceAccount:$cb_sa_email" \ + --role="roles/secretmanager.secretAccessor" \ + --project="$PROJECT_ID" \ + --quiet + + gcloud secrets add-iam-policy-binding twilio-auth-token \ + --member="serviceAccount:$cb_sa_email" \ + --role="roles/secretmanager.secretAccessor" \ + --project="$PROJECT_ID" \ + --quiet + # Set flag to use secrets-enabled service.yaml export USE_SECRETS=true diff --git a/nginx.conf b/nginx.conf index 6d0137b..23faff6 100644 --- a/nginx.conf +++ b/nginx.conf @@ -76,6 +76,7 @@ http { # Chrome DevTools Protocol HTTP endpoints location /json { proxy_pass http://127.0.0.1:9223; + proxy_http_version 1.1; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; diff --git a/service-secrets.yaml b/service-secrets.yaml index d265334..19f6f94 100644 --- a/service-secrets.yaml +++ b/service-secrets.yaml @@ -36,8 +36,8 @@ spec: limits: # 2 CPU cores (within quota limits) cpu: "2" - # 4GiB memory (within quota limits) - memory: "4Gi" + # 2GiB memory (reduced to prevent DevTools crashes) + memory: "2Gi" requests: cpu: "1" memory: "2Gi" diff --git a/supervisor/services-cloudrun/neko.conf b/supervisor/services-cloudrun/neko.conf index 969b62d..7a78140 100644 --- a/supervisor/services-cloudrun/neko.conf +++ b/supervisor/services-cloudrun/neko.conf @@ -7,4 +7,4 @@ priority=15 stdout_logfile=/var/log/supervisord/neko/neko.log stdout_logfile_maxbytes=50MB redirect_stderr=true -environment=HOME="/home/kernel",USER="kernel",DISPLAY=":1",NEKO_WEBRTC_ICESERVERS_FRONTEND="",NEKO_WEBRTC_ICESERVERS_BACKEND="" \ No newline at end of file +environment=HOME="/home/kernel",USER="kernel",DISPLAY=":1" \ No newline at end of file diff --git a/twilio/update-twilio-credentials.sh b/twilio/update-twilio-credentials.sh index 3ce4364..44cb585 100755 --- a/twilio/update-twilio-credentials.sh +++ b/twilio/update-twilio-credentials.sh @@ -91,6 +91,7 @@ spec: spec: containers: - name: kernel-browser + image: us-docker.pkg.dev/${PROJECT_ID}/gcr.io/kernel-browser:latest env: - name: NEKO_ICESERVERS value: '${ice_servers}' From aa674a3ac29b92d705b3ad6aacbf886f82347265 Mon Sep 17 00:00:00 2001 From: Oleh Luchkiv Date: Sun, 5 Oct 2025 13:56:18 -0500 Subject: [PATCH 21/25] Add eval-server integration with Node.js and nginx routes - Install Node.js 22.x in final Docker image - Add eval-server build stage and startup script - Configure supervisor to auto-start eval-server - Add nginx proxy routes for eval-server API endpoints - Update service configs with eval-server environment variables --- Dockerfile.cloudrun | 35 +- browser-operator-core | 1 + cloudrun-wrapper.sh | 44 + eval-server-start.js | 45 + eval-server/.env.example | 16 + eval-server/.gitignore | 3 + eval-server/README.md | 239 +++ eval-server/nodejs/.env.example | 45 + eval-server/nodejs/CLAUDE.md | 190 +++ eval-server/nodejs/README.md | 546 +++++++ .../1233ae25-9f9e-4f77-924d-865f7d615cef.yaml | 12 + eval-server/nodejs/docs/CLIENT_SETUP.md | 445 ++++++ eval-server/nodejs/docs/PROTOCOL.md | 310 ++++ .../nodejs/docs/TRIGGERING_EVALUATIONS.md | 306 ++++ eval-server/nodejs/docs/YAML_SCHEMA.md | 315 ++++ .../nodejs/evals/action-agent/a11y-001.yaml | 46 + .../evals/action-agent/accordion-001.yaml | 46 + .../action-agent/action-agent-a11y-001.yaml | 46 + .../action-agent-accordion-001.yaml | 46 + .../action-agent-autocomplete-001.yaml | 46 + .../action-agent-checkbox-001.yaml | 46 + .../action-agent-checkbox-002.yaml | 47 + .../action-agent/action-agent-click-001.yaml | 47 + .../action-agent-context-001.yaml | 46 + .../action-agent-datepicker-001.yaml | 46 + .../action-agent-daterange-001.yaml | 46 + .../action-agent-dropdown-001.yaml | 46 + .../action-agent-dynamic-001.yaml | 46 + .../action-agent-ecommerce-001.yaml | 46 + .../action-agent/action-agent-error-001.yaml | 47 + .../action-agent/action-agent-filter-001.yaml | 46 + .../action-agent/action-agent-form-001.yaml | 46 + .../action-agent/action-agent-hover-001.yaml | 46 + .../action-agent-keyboard-001.yaml | 46 + .../action-agent/action-agent-login-001.yaml | 47 + .../action-agent/action-agent-modal-001.yaml | 46 + .../action-agent-multiselect-001.yaml | 46 + .../action-agent-multistep-001.yaml | 47 + .../action-agent/action-agent-nav-001.yaml | 46 + .../action-agent/action-agent-radio-001.yaml | 47 + .../action-agent/action-agent-slider-001.yaml | 46 + .../action-agent-tableselect-001.yaml | 46 + .../action-agent-tablesort-001.yaml | 46 + .../action-agent/action-agent-tabs-001.yaml | 46 + .../action-agent-timepicker-001.yaml | 46 + .../action-agent/action-agent-upload-001.yaml | 46 + .../action-agent/action-agent-video-001.yaml | 47 + .../action-agent/action-agent-video-002.yaml | 47 + .../evals/action-agent/autocomplete-001.yaml | 46 + .../evals/action-agent/checkbox-001.yaml | 46 + .../evals/action-agent/checkbox-002.yaml | 47 + .../nodejs/evals/action-agent/click-001.yaml | 47 + .../evals/action-agent/context-001.yaml | 46 + .../evals/action-agent/datepicker-001.yaml | 46 + .../evals/action-agent/daterange-001.yaml | 46 + .../evals/action-agent/dropdown-001.yaml | 46 + .../evals/action-agent/dynamic-001.yaml | 46 + .../evals/action-agent/ecommerce-001.yaml | 46 + .../nodejs/evals/action-agent/error-001.yaml | 47 + .../nodejs/evals/action-agent/filter-001.yaml | 46 + .../nodejs/evals/action-agent/form-001.yaml | 46 + .../nodejs/evals/action-agent/hover-001.yaml | 46 + .../evals/action-agent/keyboard-001.yaml | 46 + .../nodejs/evals/action-agent/login-001.yaml | 47 + .../nodejs/evals/action-agent/modal-001.yaml | 46 + .../evals/action-agent/multiselect-001.yaml | 46 + .../evals/action-agent/multistep-001.yaml | 47 + .../nodejs/evals/action-agent/nav-001.yaml | 46 + .../nodejs/evals/action-agent/radio-001.yaml | 47 + .../nodejs/evals/action-agent/slider-001.yaml | 46 + .../evals/action-agent/tableselect-001.yaml | 46 + .../evals/action-agent/tablesort-001.yaml | 46 + .../nodejs/evals/action-agent/tabs-001.yaml | 46 + .../evals/action-agent/timepicker-001.yaml | 46 + .../nodejs/evals/action-agent/upload-001.yaml | 46 + .../nodejs/evals/action-agent/video-001.yaml | 47 + .../nodejs/evals/action-agent/video-002.yaml | 47 + eval-server/nodejs/evals/config.yaml | 11 + .../end-to-end/b-vitamins-research-001.yaml | 35 + .../end-to-end/investment-research-001.yaml | 35 + .../end-to-end/product-comparison-001.yaml | 40 + .../end-to-end/recipe-nutrition-001.yaml | 40 + .../evals/end-to-end/travel-planning-001.yaml | 40 + .../evals/research-agent/basic-001.yaml | 39 + .../evals/research-agent/business-001.yaml | 39 + .../evals/research-agent/comparison-001.yaml | 39 + .../evals/research-agent/current-001.yaml | 40 + .../nodejs/evals/research-agent/edge-001.yaml | 39 + .../research-agent-basic-001.yaml | 39 + .../research-agent-business-001.yaml | 39 + .../research-agent-comparison-001.yaml | 39 + .../research-agent-current-001.yaml | 40 + .../research-agent-edge-001.yaml | 39 + .../research-agent-technical-001.yaml | 39 + .../research-agent-tools-001.yaml | 40 + .../evals/research-agent/technical-001.yaml | 39 + .../evals/research-agent/tools-001.yaml | 40 + .../schema-extractor/amazon-product-001.yaml | 78 + .../evals/schema-extractor/bbc-news-001.yaml | 69 + .../schema-extractor/bing-search-001.yaml | 70 + .../github-repo-001-streamlined.yaml | 66 + .../schema-extractor/github-repo-001.yaml | 66 + .../schema-extractor/google-flights-001.yaml | 106 ++ .../schema-extractor/google-search-001.yaml | 76 + .../evals/schema-extractor/homedepot-001.yaml | 92 ++ .../evals/schema-extractor/macys-001.yaml | 106 ++ .../wikipedia-search-001.yaml | 77 + .../dynamic-content-verification-001.yaml | 45 + .../screenshot-error-handling-001.yaml | 42 + .../screenshot-fullpage-001.yaml | 43 + .../screenshot-viewport-001.yaml | 42 + .../visual-comparison-001.yaml | 45 + .../amazon-product-001.yaml | 78 + .../bbc-news-001.yaml | 69 + .../bing-search-001.yaml | 70 + .../github-repo-001.yaml | 66 + .../google-flights-001.yaml | 106 ++ .../google-search-001.yaml | 76 + .../homedepot-001.yaml | 92 ++ .../macys-001.yaml | 106 ++ .../wikipedia-001.yaml | 76 + .../wikipedia-search-001.yaml | 77 + .../evals/web-task-agent/booking-001.yaml | 45 + .../evals/web-task-agent/ecommerce-001.yaml | 53 + .../evals/web-task-agent/error-001.yaml | 45 + .../evals/web-task-agent/extract-001.yaml | 60 + .../evals/web-task-agent/finance-001.yaml | 68 + .../evals/web-task-agent/flight-001.yaml | 45 + .../nodejs/evals/web-task-agent/food-001.yaml | 68 + .../evals/web-task-agent/iframe-001.yaml | 83 ++ .../nodejs/evals/web-task-agent/jobs-001.yaml | 68 + .../evals/web-task-agent/learning-001.yaml | 69 + .../nodejs/evals/web-task-agent/nav-001.yaml | 46 + .../nodejs/evals/web-task-agent/news-001.yaml | 64 + .../evals/web-task-agent/realestate-001.yaml | 70 + .../evals/web-task-agent/scroll-001.yaml | 61 + .../evals/web-task-agent/scroll-002.yaml | 65 + .../evals/web-task-agent/scroll-003.yaml | 61 + .../evals/web-task-agent/scroll-004.yaml | 61 + .../evals/web-task-agent/scroll-005.yaml | 73 + .../evals/web-task-agent/search-001.yaml | 41 + .../evals/web-task-agent/social-001.yaml | 60 + .../web-task-agent-booking-001.yaml | 45 + .../web-task-agent-ecommerce-001.yaml | 53 + .../web-task-agent-error-001.yaml | 45 + .../web-task-agent-extract-001.yaml | 60 + .../web-task-agent-finance-001.yaml | 68 + .../web-task-agent-flight-001.yaml | 45 + .../web-task-agent-food-001.yaml | 68 + .../web-task-agent-iframe-001.yaml | 83 ++ .../web-task-agent-jobs-001.yaml | 68 + .../web-task-agent-learning-001.yaml | 69 + .../web-task-agent-nav-001.yaml | 46 + .../web-task-agent-news-001.yaml | 64 + .../web-task-agent-realestate-001.yaml | 70 + .../web-task-agent-scroll-001.yaml | 61 + .../web-task-agent-scroll-002.yaml | 65 + .../web-task-agent-scroll-003.yaml | 61 + .../web-task-agent-scroll-004.yaml | 61 + .../web-task-agent-scroll-005.yaml | 73 + .../web-task-agent-search-001.yaml | 41 + .../web-task-agent-social-001.yaml | 60 + .../1233ae25-9f9e-4f77-924d-865f7d615cef.yaml | 12 + eval-server/nodejs/examples/library-usage.js | 250 ++++ eval-server/nodejs/examples/logs/.gitignore | 3 + eval-server/nodejs/examples/multiple-evals.js | 167 +++ .../nodejs/examples/with-http-wrapper.js | 45 + eval-server/nodejs/logs/.gitignore | 2 + eval-server/nodejs/package-lock.json | 832 +++++++++++ eval-server/nodejs/package.json | 43 + eval-server/nodejs/schemas/client.schema.json | 299 ++++ eval-server/nodejs/src/api-server.js | 492 +++++++ eval-server/nodejs/src/cli/CLI.js | 518 +++++++ eval-server/nodejs/src/cli/index.js | 23 + eval-server/nodejs/src/client-manager.js | 576 ++++++++ eval-server/nodejs/src/config.js | 78 + eval-server/nodejs/src/evaluator.js | 117 ++ eval-server/nodejs/src/lib/EvalServer.js | 923 ++++++++++++ .../nodejs/src/lib/EvaluationLoader.js | 448 ++++++ eval-server/nodejs/src/lib/EvaluationStack.js | 85 ++ eval-server/nodejs/src/lib/HTTPWrapper.js | 93 ++ eval-server/nodejs/src/lib/judges/Judge.js | 80 + eval-server/nodejs/src/lib/judges/LLMJudge.js | 344 +++++ eval-server/nodejs/src/logger.js | 103 ++ eval-server/nodejs/src/rpc-client.js | 122 ++ .../nodejs/templates/default-client.yaml | 56 + eval-server/python/README.md | 368 +++++ eval-server/python/UV_COMMANDS.md | 188 +++ eval-server/python/evals/README.md | 195 +++ .../python/evals/browsecomp_dataset.py | 252 ++++ .../python/evals/browsecomp_eval_server.py | 836 +++++++++++ eval-server/python/evals/browsecomp_scorer.py | 328 +++++ .../evals/run_browsecomp_eval_server.sh | 12 + eval-server/python/examples/__init__.py | 10 + eval-server/python/examples/basic_server.py | 100 ++ eval-server/python/examples/logs/.gitignore | 2 + .../python/examples/programmatic_evals.py | 428 ++++++ eval-server/python/examples/with_stack.py | 201 +++ eval-server/python/logs/.gitignore | 2 + eval-server/python/pyproject.toml | 84 ++ eval-server/python/quick_test.py | 38 + eval-server/python/requirements.txt | 10 + eval-server/python/run.py | 100 ++ eval-server/python/scripts.py | 68 + .../python/src/bo_eval_server/__init__.py | 29 + .../src/bo_eval_server/client_manager.py | 401 +++++ .../python/src/bo_eval_server/config.py | 75 + .../python/src/bo_eval_server/eval_server.py | 292 ++++ .../src/bo_eval_server/evaluation_stack.py | 102 ++ .../python/src/bo_eval_server/logger.py | 180 +++ .../python/src/bo_eval_server/rpc_client.py | 229 +++ eval-server/python/test_client.py | 190 +++ eval-server/python/uv.lock | 1306 +++++++++++++++++ service-secrets.yaml | 11 +- service.yaml | 11 +- supervisor/services-cloudrun/eval-server.conf | 12 + test-eval-server.sh | 30 + 217 files changed, 22217 insertions(+), 6 deletions(-) create mode 160000 browser-operator-core create mode 100644 eval-server-start.js create mode 100644 eval-server/.env.example create mode 100644 eval-server/.gitignore create mode 100644 eval-server/README.md create mode 100644 eval-server/nodejs/.env.example create mode 100644 eval-server/nodejs/CLAUDE.md create mode 100644 eval-server/nodejs/README.md create mode 100644 eval-server/nodejs/clients/1233ae25-9f9e-4f77-924d-865f7d615cef.yaml create mode 100644 eval-server/nodejs/docs/CLIENT_SETUP.md create mode 100644 eval-server/nodejs/docs/PROTOCOL.md create mode 100644 eval-server/nodejs/docs/TRIGGERING_EVALUATIONS.md create mode 100644 eval-server/nodejs/docs/YAML_SCHEMA.md create mode 100644 eval-server/nodejs/evals/action-agent/a11y-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/accordion-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/action-agent-a11y-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/action-agent-accordion-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/action-agent-autocomplete-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/action-agent-checkbox-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/action-agent-checkbox-002.yaml create mode 100644 eval-server/nodejs/evals/action-agent/action-agent-click-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/action-agent-context-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/action-agent-datepicker-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/action-agent-daterange-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/action-agent-dropdown-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/action-agent-dynamic-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/action-agent-ecommerce-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/action-agent-error-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/action-agent-filter-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/action-agent-form-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/action-agent-hover-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/action-agent-keyboard-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/action-agent-login-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/action-agent-modal-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/action-agent-multiselect-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/action-agent-multistep-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/action-agent-nav-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/action-agent-radio-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/action-agent-slider-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/action-agent-tableselect-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/action-agent-tablesort-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/action-agent-tabs-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/action-agent-timepicker-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/action-agent-upload-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/action-agent-video-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/action-agent-video-002.yaml create mode 100644 eval-server/nodejs/evals/action-agent/autocomplete-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/checkbox-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/checkbox-002.yaml create mode 100644 eval-server/nodejs/evals/action-agent/click-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/context-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/datepicker-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/daterange-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/dropdown-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/dynamic-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/ecommerce-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/error-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/filter-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/form-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/hover-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/keyboard-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/login-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/modal-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/multiselect-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/multistep-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/nav-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/radio-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/slider-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/tableselect-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/tablesort-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/tabs-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/timepicker-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/upload-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/video-001.yaml create mode 100644 eval-server/nodejs/evals/action-agent/video-002.yaml create mode 100644 eval-server/nodejs/evals/config.yaml create mode 100644 eval-server/nodejs/evals/end-to-end/b-vitamins-research-001.yaml create mode 100644 eval-server/nodejs/evals/end-to-end/investment-research-001.yaml create mode 100644 eval-server/nodejs/evals/end-to-end/product-comparison-001.yaml create mode 100644 eval-server/nodejs/evals/end-to-end/recipe-nutrition-001.yaml create mode 100644 eval-server/nodejs/evals/end-to-end/travel-planning-001.yaml create mode 100644 eval-server/nodejs/evals/research-agent/basic-001.yaml create mode 100644 eval-server/nodejs/evals/research-agent/business-001.yaml create mode 100644 eval-server/nodejs/evals/research-agent/comparison-001.yaml create mode 100644 eval-server/nodejs/evals/research-agent/current-001.yaml create mode 100644 eval-server/nodejs/evals/research-agent/edge-001.yaml create mode 100644 eval-server/nodejs/evals/research-agent/research-agent-basic-001.yaml create mode 100644 eval-server/nodejs/evals/research-agent/research-agent-business-001.yaml create mode 100644 eval-server/nodejs/evals/research-agent/research-agent-comparison-001.yaml create mode 100644 eval-server/nodejs/evals/research-agent/research-agent-current-001.yaml create mode 100644 eval-server/nodejs/evals/research-agent/research-agent-edge-001.yaml create mode 100644 eval-server/nodejs/evals/research-agent/research-agent-technical-001.yaml create mode 100644 eval-server/nodejs/evals/research-agent/research-agent-tools-001.yaml create mode 100644 eval-server/nodejs/evals/research-agent/technical-001.yaml create mode 100644 eval-server/nodejs/evals/research-agent/tools-001.yaml create mode 100644 eval-server/nodejs/evals/schema-extractor/amazon-product-001.yaml create mode 100644 eval-server/nodejs/evals/schema-extractor/bbc-news-001.yaml create mode 100644 eval-server/nodejs/evals/schema-extractor/bing-search-001.yaml create mode 100644 eval-server/nodejs/evals/schema-extractor/github-repo-001-streamlined.yaml create mode 100644 eval-server/nodejs/evals/schema-extractor/github-repo-001.yaml create mode 100644 eval-server/nodejs/evals/schema-extractor/google-flights-001.yaml create mode 100644 eval-server/nodejs/evals/schema-extractor/google-search-001.yaml create mode 100644 eval-server/nodejs/evals/schema-extractor/homedepot-001.yaml create mode 100644 eval-server/nodejs/evals/schema-extractor/macys-001.yaml create mode 100644 eval-server/nodejs/evals/schema-extractor/wikipedia-search-001.yaml create mode 100644 eval-server/nodejs/evals/screenshot-verification/dynamic-content-verification-001.yaml create mode 100644 eval-server/nodejs/evals/screenshot-verification/screenshot-error-handling-001.yaml create mode 100644 eval-server/nodejs/evals/screenshot-verification/screenshot-fullpage-001.yaml create mode 100644 eval-server/nodejs/evals/screenshot-verification/screenshot-viewport-001.yaml create mode 100644 eval-server/nodejs/evals/screenshot-verification/visual-comparison-001.yaml create mode 100644 eval-server/nodejs/evals/streamlined-schema-extractor/amazon-product-001.yaml create mode 100644 eval-server/nodejs/evals/streamlined-schema-extractor/bbc-news-001.yaml create mode 100644 eval-server/nodejs/evals/streamlined-schema-extractor/bing-search-001.yaml create mode 100644 eval-server/nodejs/evals/streamlined-schema-extractor/github-repo-001.yaml create mode 100644 eval-server/nodejs/evals/streamlined-schema-extractor/google-flights-001.yaml create mode 100644 eval-server/nodejs/evals/streamlined-schema-extractor/google-search-001.yaml create mode 100644 eval-server/nodejs/evals/streamlined-schema-extractor/homedepot-001.yaml create mode 100644 eval-server/nodejs/evals/streamlined-schema-extractor/macys-001.yaml create mode 100644 eval-server/nodejs/evals/streamlined-schema-extractor/wikipedia-001.yaml create mode 100644 eval-server/nodejs/evals/streamlined-schema-extractor/wikipedia-search-001.yaml create mode 100644 eval-server/nodejs/evals/web-task-agent/booking-001.yaml create mode 100644 eval-server/nodejs/evals/web-task-agent/ecommerce-001.yaml create mode 100644 eval-server/nodejs/evals/web-task-agent/error-001.yaml create mode 100644 eval-server/nodejs/evals/web-task-agent/extract-001.yaml create mode 100644 eval-server/nodejs/evals/web-task-agent/finance-001.yaml create mode 100644 eval-server/nodejs/evals/web-task-agent/flight-001.yaml create mode 100644 eval-server/nodejs/evals/web-task-agent/food-001.yaml create mode 100644 eval-server/nodejs/evals/web-task-agent/iframe-001.yaml create mode 100644 eval-server/nodejs/evals/web-task-agent/jobs-001.yaml create mode 100644 eval-server/nodejs/evals/web-task-agent/learning-001.yaml create mode 100644 eval-server/nodejs/evals/web-task-agent/nav-001.yaml create mode 100644 eval-server/nodejs/evals/web-task-agent/news-001.yaml create mode 100644 eval-server/nodejs/evals/web-task-agent/realestate-001.yaml create mode 100644 eval-server/nodejs/evals/web-task-agent/scroll-001.yaml create mode 100644 eval-server/nodejs/evals/web-task-agent/scroll-002.yaml create mode 100644 eval-server/nodejs/evals/web-task-agent/scroll-003.yaml create mode 100644 eval-server/nodejs/evals/web-task-agent/scroll-004.yaml create mode 100644 eval-server/nodejs/evals/web-task-agent/scroll-005.yaml create mode 100644 eval-server/nodejs/evals/web-task-agent/search-001.yaml create mode 100644 eval-server/nodejs/evals/web-task-agent/social-001.yaml create mode 100644 eval-server/nodejs/evals/web-task-agent/web-task-agent-booking-001.yaml create mode 100644 eval-server/nodejs/evals/web-task-agent/web-task-agent-ecommerce-001.yaml create mode 100644 eval-server/nodejs/evals/web-task-agent/web-task-agent-error-001.yaml create mode 100644 eval-server/nodejs/evals/web-task-agent/web-task-agent-extract-001.yaml create mode 100644 eval-server/nodejs/evals/web-task-agent/web-task-agent-finance-001.yaml create mode 100644 eval-server/nodejs/evals/web-task-agent/web-task-agent-flight-001.yaml create mode 100644 eval-server/nodejs/evals/web-task-agent/web-task-agent-food-001.yaml create mode 100644 eval-server/nodejs/evals/web-task-agent/web-task-agent-iframe-001.yaml create mode 100644 eval-server/nodejs/evals/web-task-agent/web-task-agent-jobs-001.yaml create mode 100644 eval-server/nodejs/evals/web-task-agent/web-task-agent-learning-001.yaml create mode 100644 eval-server/nodejs/evals/web-task-agent/web-task-agent-nav-001.yaml create mode 100644 eval-server/nodejs/evals/web-task-agent/web-task-agent-news-001.yaml create mode 100644 eval-server/nodejs/evals/web-task-agent/web-task-agent-realestate-001.yaml create mode 100644 eval-server/nodejs/evals/web-task-agent/web-task-agent-scroll-001.yaml create mode 100644 eval-server/nodejs/evals/web-task-agent/web-task-agent-scroll-002.yaml create mode 100644 eval-server/nodejs/evals/web-task-agent/web-task-agent-scroll-003.yaml create mode 100644 eval-server/nodejs/evals/web-task-agent/web-task-agent-scroll-004.yaml create mode 100644 eval-server/nodejs/evals/web-task-agent/web-task-agent-scroll-005.yaml create mode 100644 eval-server/nodejs/evals/web-task-agent/web-task-agent-search-001.yaml create mode 100644 eval-server/nodejs/evals/web-task-agent/web-task-agent-social-001.yaml create mode 100644 eval-server/nodejs/examples/clients/1233ae25-9f9e-4f77-924d-865f7d615cef.yaml create mode 100644 eval-server/nodejs/examples/library-usage.js create mode 100644 eval-server/nodejs/examples/logs/.gitignore create mode 100755 eval-server/nodejs/examples/multiple-evals.js create mode 100644 eval-server/nodejs/examples/with-http-wrapper.js create mode 100644 eval-server/nodejs/logs/.gitignore create mode 100644 eval-server/nodejs/package-lock.json create mode 100644 eval-server/nodejs/package.json create mode 100644 eval-server/nodejs/schemas/client.schema.json create mode 100644 eval-server/nodejs/src/api-server.js create mode 100644 eval-server/nodejs/src/cli/CLI.js create mode 100644 eval-server/nodejs/src/cli/index.js create mode 100644 eval-server/nodejs/src/client-manager.js create mode 100644 eval-server/nodejs/src/config.js create mode 100644 eval-server/nodejs/src/evaluator.js create mode 100644 eval-server/nodejs/src/lib/EvalServer.js create mode 100644 eval-server/nodejs/src/lib/EvaluationLoader.js create mode 100644 eval-server/nodejs/src/lib/EvaluationStack.js create mode 100644 eval-server/nodejs/src/lib/HTTPWrapper.js create mode 100644 eval-server/nodejs/src/lib/judges/Judge.js create mode 100644 eval-server/nodejs/src/lib/judges/LLMJudge.js create mode 100644 eval-server/nodejs/src/logger.js create mode 100644 eval-server/nodejs/src/rpc-client.js create mode 100644 eval-server/nodejs/templates/default-client.yaml create mode 100644 eval-server/python/README.md create mode 100644 eval-server/python/UV_COMMANDS.md create mode 100644 eval-server/python/evals/README.md create mode 100644 eval-server/python/evals/browsecomp_dataset.py create mode 100755 eval-server/python/evals/browsecomp_eval_server.py create mode 100644 eval-server/python/evals/browsecomp_scorer.py create mode 100755 eval-server/python/evals/run_browsecomp_eval_server.sh create mode 100644 eval-server/python/examples/__init__.py create mode 100644 eval-server/python/examples/basic_server.py create mode 100644 eval-server/python/examples/logs/.gitignore create mode 100644 eval-server/python/examples/programmatic_evals.py create mode 100644 eval-server/python/examples/with_stack.py create mode 100644 eval-server/python/logs/.gitignore create mode 100644 eval-server/python/pyproject.toml create mode 100644 eval-server/python/quick_test.py create mode 100644 eval-server/python/requirements.txt create mode 100644 eval-server/python/run.py create mode 100644 eval-server/python/scripts.py create mode 100644 eval-server/python/src/bo_eval_server/__init__.py create mode 100644 eval-server/python/src/bo_eval_server/client_manager.py create mode 100644 eval-server/python/src/bo_eval_server/config.py create mode 100644 eval-server/python/src/bo_eval_server/eval_server.py create mode 100644 eval-server/python/src/bo_eval_server/evaluation_stack.py create mode 100644 eval-server/python/src/bo_eval_server/logger.py create mode 100644 eval-server/python/src/bo_eval_server/rpc_client.py create mode 100644 eval-server/python/test_client.py create mode 100644 eval-server/python/uv.lock create mode 100644 supervisor/services-cloudrun/eval-server.conf create mode 100755 test-eval-server.sh diff --git a/Dockerfile.cloudrun b/Dockerfile.cloudrun index 91ca6c7..e525eed 100644 --- a/Dockerfile.cloudrun +++ b/Dockerfile.cloudrun @@ -52,6 +52,13 @@ RUN git checkout upstream/main # Build Browser Operator version RUN npm run build +# Eval-Server build stage +FROM node:22-bullseye-slim AS eval-server-builder +WORKDIR /eval-server +COPY eval-server/nodejs/package*.json ./ +RUN npm install --production +COPY eval-server/nodejs/ ./ + # Multi-stage build using kernel-images as base FROM docker.io/golang:1.25.0 AS server-builder WORKDIR /workspace/server @@ -143,7 +150,16 @@ RUN apt-get update && \ netcat \ nginx \ # PPA req - software-properties-common && \ + software-properties-common \ + # Node.js for eval-server + ca-certificates \ + gnupg && \ + # Install Node.js 22.x for eval-server + mkdir -p /etc/apt/keyrings && \ + curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg && \ + echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_22.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list && \ + apt-get update && \ + apt-get install -y nodejs && \ # Disable nginx auto-start to prevent conflicts with custom config systemctl disable nginx || true && \ systemctl mask nginx || true && \ @@ -256,6 +272,20 @@ COPY --from=devtools-builder /workspace/devtools/devtools-frontend/out/Default/g # Set permissions for DevTools files RUN chown -R kernel:kernel /usr/share/nginx/devtools +# ============================================================================ +# Eval-Server Integration +# ============================================================================ + +# Copy eval-server from builder +COPY --from=eval-server-builder /eval-server /opt/eval-server + +# Copy custom eval-server startup script INTO eval-server directory +COPY eval-server-start.js /opt/eval-server/start-cloudrun.js +RUN chmod +x /opt/eval-server/start-cloudrun.js + +# Set permissions for eval-server +RUN chown -R kernel:kernel /opt/eval-server + # Cloud Run specific: wrapper scripts (nginx config is inline) # DO NOT copy nginx.conf to avoid auto-start conflicts COPY cloudrun-wrapper.sh /cloudrun-wrapper.sh @@ -268,6 +298,7 @@ COPY supervisor/services-cloudrun/xorg.conf /etc/supervisor/conf.d/services-clou COPY supervisor/services-cloudrun/neko.conf /etc/supervisor/conf.d/services-cloudrun/neko.conf COPY supervisor/services-cloudrun/chromium.conf /etc/supervisor/conf.d/services-cloudrun/chromium.conf COPY supervisor/services-cloudrun/devtools-frontend.conf /etc/supervisor/conf.d/services-cloudrun/devtools-frontend.conf +COPY supervisor/services-cloudrun/eval-server.conf /etc/supervisor/conf.d/services-cloudrun/eval-server.conf # Create nginx temp directories for non-root execution RUN mkdir -p /tmp/nginx_client_temp /tmp/nginx_proxy_temp /tmp/nginx_fastcgi_temp \ @@ -279,7 +310,7 @@ RUN mkdir -p /tmp/nginx_client_temp /tmp/nginx_proxy_temp /tmp/nginx_fastcgi_tem # Create supervisor log directories RUN mkdir -p /var/log/supervisord/chromium /var/log/supervisord/neko /var/log/supervisord/xorg \ /var/log/supervisord/dbus /var/log/supervisord/kernel-images-api /var/log/supervisord/mutter \ - /var/log/supervisord/nginx /var/log/supervisord/devtools-frontend && \ + /var/log/supervisord/nginx /var/log/supervisord/devtools-frontend /var/log/supervisord/eval-server && \ chown -R kernel:kernel /var/log/supervisord # Create health check endpoint diff --git a/browser-operator-core b/browser-operator-core new file mode 160000 index 0000000..3aaef1e --- /dev/null +++ b/browser-operator-core @@ -0,0 +1 @@ +Subproject commit 3aaef1ef13cede9dd2b443ee5eddf7102be8cc24 diff --git a/cloudrun-wrapper.sh b/cloudrun-wrapper.sh index 25d6003..34a9428 100644 --- a/cloudrun-wrapper.sh +++ b/cloudrun-wrapper.sh @@ -167,6 +167,50 @@ http { proxy_send_timeout 86400; } + # Eval-Server HTTP API endpoints + location /v1/responses { + proxy_pass http://127.0.0.1:8083/v1/responses; + proxy_http_version 1.1; + proxy_set_header Host \$host; + proxy_set_header X-Real-IP \$remote_addr; + proxy_set_header X-Forwarded-For \$proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto \$scheme; + proxy_read_timeout 1800; + proxy_send_timeout 1800; + } + + # Eval-Server status endpoint + location /eval/status { + proxy_pass http://127.0.0.1:8083/status; + proxy_http_version 1.1; + proxy_set_header Host \$host; + proxy_set_header X-Real-IP \$remote_addr; + proxy_set_header X-Forwarded-For \$proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto \$scheme; + } + + # Eval-Server clients endpoint + location /eval/clients { + proxy_pass http://127.0.0.1:8083/clients; + proxy_http_version 1.1; + proxy_set_header Host \$host; + proxy_set_header X-Real-IP \$remote_addr; + proxy_set_header X-Forwarded-For \$proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto \$scheme; + } + + # Eval-Server evaluate endpoint + location /eval/evaluate { + proxy_pass http://127.0.0.1:8083/evaluate; + proxy_http_version 1.1; + proxy_set_header Host \$host; + proxy_set_header X-Real-IP \$remote_addr; + proxy_set_header X-Forwarded-For \$proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto \$scheme; + proxy_read_timeout 1800; + proxy_send_timeout 1800; + } + # Enhanced DevTools Frontend location /devtools/ { proxy_pass http://127.0.0.1:8001/; diff --git a/eval-server-start.js b/eval-server-start.js new file mode 100644 index 0000000..bd4c8b8 --- /dev/null +++ b/eval-server-start.js @@ -0,0 +1,45 @@ +#!/usr/bin/env node + +// Custom eval-server startup script for Cloud Run +// Uses environment variables for port configuration + +import { EvalServer } from './src/lib/EvalServer.js'; +import { HTTPWrapper } from './src/lib/HTTPWrapper.js'; + +const WS_PORT = parseInt(process.env.EVAL_SERVER_WS_PORT || '8082'); +const HTTP_PORT = parseInt(process.env.EVAL_SERVER_HTTP_PORT || '8083'); +const HOST = process.env.EVAL_SERVER_HOST || '127.0.0.1'; + +console.log('๐Ÿ”ง Creating EvalServer...'); +const evalServer = new EvalServer({ + // No authKey - authentication disabled for automated mode + host: HOST, + port: WS_PORT +}); + +console.log('๐Ÿ”ง Creating HTTP wrapper...'); +const httpWrapper = new HTTPWrapper(evalServer, { + port: HTTP_PORT, + host: HOST +}); + +console.log('๐Ÿ”ง Starting EvalServer...'); +await evalServer.start(); +console.log(`โœ… EvalServer started on ws://${HOST}:${WS_PORT}`); + +console.log('๐Ÿ”ง Starting HTTP wrapper...'); +await httpWrapper.start(); +console.log(`โœ… HTTP API started on http://${HOST}:${HTTP_PORT}`); + +console.log('โณ Waiting for DevTools client to connect...'); +console.log(` WebSocket URL: ws://${HOST}:${WS_PORT}`); +console.log(` HTTP API URL: http://${HOST}:${HTTP_PORT}`); +console.log(' Auth: Disabled (automated mode)'); + +// Add periodic status check +setInterval(() => { + const evalServerStatus = evalServer.getStatus(); + const httpWrapperStatus = httpWrapper.getStatus(); + console.log(`๐Ÿ“Š EvalServer: ${evalServerStatus.connectedClients} clients, ${evalServerStatus.readyClients} ready`); + console.log(`๐Ÿ“Š HTTP API: ${httpWrapperStatus.isRunning ? 'running' : 'stopped'} on ${httpWrapperStatus.url}`); +}, 30000); diff --git a/eval-server/.env.example b/eval-server/.env.example new file mode 100644 index 0000000..1e8a748 --- /dev/null +++ b/eval-server/.env.example @@ -0,0 +1,16 @@ +# WebSocket Server Configuration +PORT=8080 +HOST=localhost + +# LLM Judge Configuration +OPENAI_API_KEY=your-openai-api-key-here +JUDGE_MODEL=gpt-4 +JUDGE_TEMPERATURE=0.1 + +# Logging Configuration +LOG_LEVEL=info +LOG_DIR=./logs + +# RPC Configuration +RPC_TIMEOUT=30000 +MAX_CONCURRENT_EVALUATIONS=10 \ No newline at end of file diff --git a/eval-server/.gitignore b/eval-server/.gitignore new file mode 100644 index 0000000..78e7d64 --- /dev/null +++ b/eval-server/.gitignore @@ -0,0 +1,3 @@ +.env +node_modules +*.log diff --git a/eval-server/README.md b/eval-server/README.md new file mode 100644 index 0000000..88c852f --- /dev/null +++ b/eval-server/README.md @@ -0,0 +1,239 @@ +# Eval-Server + +A WebSocket-based evaluation server for LLM agents with multiple language implementations. + +## Overview + +This directory contains two functionally equivalent implementations of the bo-eval-server: + +- **NodeJS** (`nodejs/`) - Full-featured implementation with YAML evaluations, HTTP API, CLI, and judge system +- **Python** (`python/`) - Minimal library focused on core WebSocket functionality and programmatic evaluation creation + +Both implementations provide: +- ๐Ÿ”Œ **WebSocket Server** - Real-time agent connections +- ๐Ÿค– **Bidirectional RPC** - JSON-RPC 2.0 for calling agent methods +- ๐Ÿ“š **Programmatic API** - Create and manage evaluations in code +- โšก **Concurrent Support** - Handle multiple agents simultaneously +- ๐Ÿ“Š **Structured Logging** - Comprehensive evaluation tracking + +## Quick Start + +### NodeJS (Full Featured) + +The NodeJS implementation includes YAML evaluation loading, HTTP API wrapper, CLI tools, and LLM-as-a-judge functionality. + +```bash +cd nodejs/ +npm install +npm start +``` + +**Key Features:** +- YAML evaluation file loading +- HTTP API wrapper for REST integration +- Interactive CLI for management +- LLM judge system for response evaluation +- Comprehensive documentation and examples + +See [`nodejs/README.md`](nodejs/README.md) for detailed usage. + +### Python (Lightweight Library) + +The Python implementation focuses on core WebSocket functionality with programmatic evaluation creation. + +```bash +cd python/ +pip install -e . +python examples/basic_server.py +``` + +**Key Features:** +- Minimal dependencies (websockets, loguru) +- Full async/await support +- Evaluation stack for LIFO queuing +- Type hints throughout +- Clean Pythonic API + +See [`python/README.md`](python/README.md) for detailed usage. + +## Architecture Comparison + +| Feature | NodeJS | Python | +|---------|--------|--------| +| **Core WebSocket Server** | โœ… | โœ… | +| **JSON-RPC 2.0** | โœ… | โœ… | +| **Client Management** | โœ… | โœ… | +| **Programmatic Evaluations** | โœ… | โœ… | +| **Evaluation Stack** | โœ… | โœ… | +| **Structured Logging** | โœ… (Winston) | โœ… (Loguru) | +| **YAML Evaluations** | โœ… | โŒ | +| **HTTP API Wrapper** | โœ… | โŒ | +| **CLI Interface** | โœ… | โŒ | +| **LLM Judge System** | โœ… | โŒ | +| **Type System** | TypeScript | Type Hints | + +## Choosing an Implementation + +**Choose NodeJS if you need:** +- YAML-based evaluation definitions +- HTTP REST API endpoints +- Interactive CLI for management +- LLM-as-a-judge evaluation +- Comprehensive feature set + +**Choose Python if you need:** +- Minimal dependencies +- Pure programmatic approach +- Integration with Python ML pipelines +- Modern async/await patterns +- Lightweight deployment + +## Agent Protocol + +Both implementations use the same WebSocket protocol: + +### 1. Connect to WebSocket +```javascript +// NodeJS +const ws = new WebSocket('ws://localhost:8080'); + +// Python +import websockets +ws = await websockets.connect('ws://localhost:8080') +``` + +### 2. Send Registration +```json +{ + "type": "register", + "clientId": "your-client-id", + "secretKey": "your-secret-key", + "capabilities": ["chat", "action"] +} +``` + +### 3. Send Ready Signal +```json +{ + "type": "ready" +} +``` + +### 4. Handle RPC Calls +Both implementations send JSON-RPC 2.0 requests with the `evaluate` method: + +```json +{ + "jsonrpc": "2.0", + "method": "evaluate", + "params": { + "id": "eval_001", + "name": "Test Evaluation", + "tool": "chat", + "input": {"message": "Hello world"} + }, + "id": "unique-call-id" +} +``` + +Agents should respond with: +```json +{ + "jsonrpc": "2.0", + "id": "unique-call-id", + "result": { + "status": "completed", + "output": {"response": "Hello! How can I help you?"} + } +} +``` + +## Examples + +### NodeJS Example +```javascript +import { EvalServer } from 'bo-eval-server'; + +const server = new EvalServer({ + authKey: 'secret', + port: 8080 +}); + +server.onConnect(async client => { + const result = await client.evaluate({ + id: "test", + name: "Hello World", + tool: "chat", + input: {message: "Hi there!"} + }); + console.log(result); +}); + +await server.start(); +``` + +### Python Example +```python +import asyncio +from bo_eval_server import EvalServer + +async def main(): + server = EvalServer( + auth_key='secret', + port=8080 + ) + + @server.on_connect + async def handle_client(client): + result = await client.evaluate({ + "id": "test", + "name": "Hello World", + "tool": "chat", + "input": {"message": "Hi there!"} + }) + print(result) + + await server.start() + await server.wait_closed() + +asyncio.run(main()) +``` + +## Development + +Each implementation has its own development setup: + +**NodeJS:** +```bash +cd nodejs/ +npm install +npm run dev # Watch mode +npm test # Run tests +npm run cli # Interactive CLI +``` + +**Python:** +```bash +cd python/ +pip install -e ".[dev]" +pytest # Run tests +black . # Format code +mypy src/ # Type checking +``` + +## Contributing + +When contributing to either implementation: + +1. Maintain API compatibility between versions where possible +2. Update documentation for both implementations when adding shared features +3. Follow the existing code style and patterns +4. Add appropriate tests and examples + +## License + +MIT License - see individual implementation directories for details. + +--- + +Both implementations provide robust, production-ready evaluation servers for LLM agents with different feature sets optimized for different use cases. \ No newline at end of file diff --git a/eval-server/nodejs/.env.example b/eval-server/nodejs/.env.example new file mode 100644 index 0000000..a19f3a1 --- /dev/null +++ b/eval-server/nodejs/.env.example @@ -0,0 +1,45 @@ +# Evaluation Server Configuration +# Copy this file to .env and configure your settings + +# Server Configuration +PORT=8080 +HOST=127.0.0.1 + +# LLM Provider API Keys +# Configure one or more providers for evaluation + +# OpenAI Configuration +OPENAI_API_KEY=sk-your-openai-api-key-here + +# LiteLLM Configuration (if using a LiteLLM server) +LITELLM_ENDPOINT=http://localhost:4000 +LITELLM_API_KEY=your-litellm-api-key-here + +# Groq Configuration +GROQ_API_KEY=gsk_your-groq-api-key-here + +# OpenRouter Configuration +OPENROUTER_API_KEY=sk-or-v1-your-openrouter-api-key-here + +# Default LLM Configuration for Evaluations +# These will be used as fallbacks when not specified in evaluation requests +DEFAULT_PROVIDER=openai +DEFAULT_MAIN_MODEL=gpt-4 +DEFAULT_MINI_MODEL=gpt-4-mini +DEFAULT_NANO_MODEL=gpt-3.5-turbo + +# Logging Configuration +LOG_LEVEL=info +LOG_DIR=./logs + +# Client Configuration +CLIENTS_DIR=./clients +EVALS_DIR=./evals + +# RPC Configuration +RPC_TIMEOUT=30000 + +# Security +# Set this to enable authentication for client connections +# Leave empty to disable authentication +AUTH_SECRET_KEY= \ No newline at end of file diff --git a/eval-server/nodejs/CLAUDE.md b/eval-server/nodejs/CLAUDE.md new file mode 100644 index 0000000..ba84f31 --- /dev/null +++ b/eval-server/nodejs/CLAUDE.md @@ -0,0 +1,190 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +bo-eval-server is a WebSocket-based evaluation server for LLM agents that implements an LLM-as-a-judge evaluation system. The server accepts connections from AI agents, sends them evaluation tasks via RPC calls, collects their responses, and uses an LLM to judge the quality of responses. + +## Commands + +### Development +- `npm start` - Start the WebSocket server +- `npm run dev` - Start server with file watching for development +- `npm run cli` - Start interactive CLI for server management and testing +- `npm test` - Run example agent client for testing + +### Installation +- `npm install` - Install dependencies +- Copy `.env.example` to `.env` and configure environment variables + +### Required Environment Variables +- `OPENAI_API_KEY` - OpenAI API key for LLM judge functionality +- `PORT` - WebSocket server port (default: 8080) + +### LLM Provider Configuration (Optional) +- `GROQ_API_KEY` - Groq API key for Groq provider support +- `OPENROUTER_API_KEY` - OpenRouter API key for OpenRouter provider support +- `LITELLM_ENDPOINT` - LiteLLM server endpoint URL +- `LITELLM_API_KEY` - LiteLLM API key for LiteLLM provider support +- `DEFAULT_PROVIDER` - Default LLM provider (openai, groq, openrouter, litellm) +- `DEFAULT_MAIN_MODEL` - Default main model name +- `DEFAULT_MINI_MODEL` - Default mini model name +- `DEFAULT_NANO_MODEL` - Default nano model name + +## Architecture + +### Core Components + +**WebSocket Server** (`src/server.js`) +- Accepts connections from LLM agents +- Manages agent lifecycle (connect, ready, disconnect) +- Orchestrates evaluation sessions +- Handles bidirectional RPC communication + +**RPC Client** (`src/rpc-client.js`) +- Implements JSON-RPC 2.0 protocol for bidirectional communication +- Manages request/response correlation with unique IDs +- Handles timeouts and error conditions +- Calls `Evaluate(request: String) -> String` method on connected agents +- Supports `configure_llm` method for dynamic LLM provider configuration + +**LLM Evaluator** (`src/evaluator.js`) +- Integrates with OpenAI API for LLM-as-a-judge functionality +- Evaluates agent responses on multiple criteria (correctness, completeness, clarity, relevance, helpfulness) +- Returns structured JSON evaluation with scores and reasoning + +**Logger** (`src/logger.js`) +- Structured logging using Winston +- Separate log files for different event types +- JSON format for easy parsing and analysis +- Logs all RPC calls, evaluations, and connection events + +### Evaluation Flow + +1. Agent connects to WebSocket server +2. Agent sends "ready" signal +3. Server calls agent's `Evaluate` method with a task +4. Agent processes task and returns response +5. Server sends response to LLM judge for evaluation +6. Results are logged as JSON with scores and detailed feedback + +### Project Structure + +``` +src/ +โ”œโ”€โ”€ server.js # Main WebSocket server and evaluation orchestration +โ”œโ”€โ”€ rpc-client.js # JSON-RPC client for calling agent methods +โ”œโ”€โ”€ evaluator.js # LLM judge integration (OpenAI) +โ”œโ”€โ”€ logger.js # Structured logging and result storage +โ”œโ”€โ”€ config.js # Configuration management +โ””โ”€โ”€ cli.js # Interactive CLI for testing and management + +logs/ # Log files (created automatically) +โ”œโ”€โ”€ combined.log # All log events +โ”œโ”€โ”€ error.log # Error events only +โ””โ”€โ”€ evaluations.jsonl # Evaluation results in JSON Lines format +``` + +### Key Features + +- **Bidirectional RPC**: Server can call methods on connected clients +- **Multi-Provider LLM Support**: Support for OpenAI, Groq, OpenRouter, and LiteLLM providers +- **Dynamic LLM Configuration**: Runtime configuration via `configure_llm` JSON-RPC method +- **Per-Client Configuration**: Each connected client can have different LLM settings +- **LLM-as-a-Judge**: Automated evaluation of agent responses using configurable LLM providers +- **Concurrent Evaluations**: Support for multiple agents and parallel evaluations +- **Structured Logging**: All interactions logged as JSON for analysis +- **Interactive CLI**: Built-in CLI for testing and server management +- **Connection Management**: Robust handling of agent connections and disconnections +- **Timeout Handling**: Configurable timeouts for RPC calls and evaluations + +### Agent Protocol + +Agents must implement: +- WebSocket connection to server +- JSON-RPC 2.0 protocol support +- `Evaluate(task: string) -> string` method +- "ready" message to signal availability for evaluations + +### Model Configuration Schema + +The server uses a canonical nested model configuration format that allows per-tier provider and API key settings: + +#### Model Configuration Structure + +```typescript +interface ModelTierConfig { + provider: string; // "openai" | "groq" | "openrouter" | "litellm" + model: string; // Model name (e.g., "gpt-4", "llama-3.1-8b-instant") + api_key: string; // API key for this tier +} + +interface ModelConfig { + main_model: ModelTierConfig; // Primary model for complex tasks + mini_model: ModelTierConfig; // Secondary model for simpler tasks + nano_model: ModelTierConfig; // Tertiary model for basic tasks +} +``` + +#### Example: Evaluation with Model Configuration + +```json +{ + "jsonrpc": "2.0", + "method": "evaluate", + "params": { + "tool": "chat", + "input": {"message": "Hello"}, + "model": { + "main_model": { + "provider": "openai", + "model": "gpt-4", + "api_key": "sk-main-key" + }, + "mini_model": { + "provider": "openai", + "model": "gpt-4-mini", + "api_key": "sk-mini-key" + }, + "nano_model": { + "provider": "groq", + "model": "llama-3.1-8b-instant", + "api_key": "gsk-nano-key" + } + } + } +} +``` + +### Dynamic LLM Configuration + +The server supports runtime LLM configuration via the `configure_llm` JSON-RPC method: + +```json +{ + "jsonrpc": "2.0", + "method": "configure_llm", + "params": { + "provider": "openai|groq|openrouter|litellm", + "apiKey": "your-api-key", + "endpoint": "endpoint-url-for-litellm", + "models": { + "main": "main-model-name", + "mini": "mini-model-name", + "nano": "nano-model-name" + }, + "partial": false + }, + "id": "config-request-id" +} +``` + +### Configuration + +All configuration is managed through environment variables and `src/config.js`. Key settings: +- Server port and host +- OpenAI API configuration +- RPC timeouts +- Logging levels and directories +- Maximum concurrent evaluations \ No newline at end of file diff --git a/eval-server/nodejs/README.md b/eval-server/nodejs/README.md new file mode 100644 index 0000000..d29f9bc --- /dev/null +++ b/eval-server/nodejs/README.md @@ -0,0 +1,546 @@ +# bo-eval-server + +A library-first evaluation server for LLM agents with modular architecture and programmatic API. + +## Features + +- ๐Ÿ“š **Library-First Architecture**: Programmatic API for custom integrations +- ๐Ÿ”Œ **WebSocket Server**: Real-time agent connections (core) +- ๐ŸŒ **Optional HTTP API**: REST endpoints via separate wrapper +- ๐Ÿค– **Bidirectional RPC**: Call methods on connected agents +- โš–๏ธ **Optional LLM Judge**: GPT-4 evaluation (when configured) +- ๐Ÿ“Š **Structured Logging**: JSON logging of all evaluations +- ๐Ÿ–ฅ๏ธ **Interactive CLI**: Built-in management interface +- โšก **Concurrent Evaluations**: Multi-agent support +- โœจ **No Configuration Required**: Works without config files or API keys + +## Quick Start + +### Basic WebSocket Server + +```javascript +import { EvalServer } from 'bo-eval-server'; + +const server = new EvalServer({ + authKey: 'hello', + host: '127.0.0.1', + port: 8080 +}); + +server.onConnect(async client => { + console.log('Client connected:', client.id); + + const response = await client.evaluate({ + id: "test_eval", + name: "Capital of France", + tool: "chat", + input: { message: "What is the capital of France?" } + }); + + console.log('Response:', JSON.stringify(response, null, 2)); +}); + +await server.start(); +console.log('Server running on ws://127.0.0.1:8080'); +``` + +### With Optional HTTP API + +```javascript +import { EvalServer, HTTPWrapper } from 'bo-eval-server'; + +// Create core WebSocket server +const evalServer = new EvalServer({ + authKey: 'hello', + port: 8080 +}); + +// Add optional HTTP API wrapper +const httpWrapper = new HTTPWrapper(evalServer, { + port: 8081 +}); + +// Set up client connection handler +evalServer.onConnect(async client => { + // Handle evaluations... +}); + +// Start both servers +await evalServer.start(); +await httpWrapper.start(); + +console.log('WebSocket: ws://localhost:8080'); +console.log('HTTP API: http://localhost:8081'); +``` + +## Installation & Setup + +```bash +# Install dependencies +npm install + +# Run examples +npm start # Server with HTTP API +npm run lib:example # WebSocket-only server +npm run cli # Interactive CLI +npm run dev # Development mode +``` + +## Library Usage + +### Core EvalServer API + +The `EvalServer` class provides the core WebSocket-based evaluation server: + +```javascript +import { EvalServer } from 'bo-eval-server'; + +const server = new EvalServer({ + // Required + authKey: 'your-secret-key', // Client authentication key + + // Optional + host: '127.0.0.1', // Server host (default: 'localhost') + port: 8080, // Server port (default: 8080) + clientsDir: './clients', // Client config directory + evalsDir: './evals' // Evaluations directory +}); + +// Event handlers +server.onConnect(clientProxy => { + // Called when client connects and is ready +}); + +server.onDisconnect(clientInfo => { + // Called when client disconnects +}); + +// Server lifecycle +await server.start(); +await server.stop(); + +// Server status +console.log(server.getStatus()); +``` + +### Client Proxy API + +When a client connects, you receive a `ClientProxy` object: + +```javascript +server.onConnect(async client => { + // Client information + console.log('Client ID:', client.id); + console.log('Tab ID:', client.tabId); + console.log('Base Client ID:', client.baseClientId); + console.log('Info:', client.getInfo()); + + // Execute evaluations + const result = await client.evaluate({ + id: "eval_001", // Unique evaluation ID + name: "Test Evaluation", // Human-readable name + description: "Description", // Optional description + tool: "chat", // Tool to use: "chat", "action", etc. + input: { // Tool-specific input + message: "Your question here" + }, + timeout: 30000, // Optional timeout (ms) + model: { // Optional nested model config + main_model: { + provider: "openai", + model: "gpt-4", + api_key: "sk-..." + }, + mini_model: { + provider: "openai", + model: "gpt-4-mini", + api_key: "sk-..." + }, + nano_model: { + provider: "groq", + model: "llama-3.1-8b-instant", + api_key: "gsk-..." + } + }, + metadata: { // Optional metadata + tags: ['api', 'test'] + } + }); + + // Send custom messages + client.sendMessage({ + type: 'custom', + data: 'Hello client!' + }); +}); +``` + +### Advanced Usage with YAML Evaluations + +```javascript +import { EvalServer, EvaluationLoader } from 'bo-eval-server'; + +const server = new EvalServer({ + authKey: 'secret-key', + port: 8080 +}); + +// Load evaluations from YAML files +await server.loadEvaluations('./evals'); + +// Access evaluation loader +const loader = server.evaluationLoader; + +// Get evaluation statistics +const stats = loader.getStatistics(); +console.log('Total evaluations:', stats.total); +console.log('Categories:', stats.categories); + +// Filter evaluations +const chatEvals = loader.filterEvaluations({ + tool: 'chat', + enabled: true +}); + +const actionAgentEvals = loader.getEvaluationsByCategory('action-agent'); + +// Create custom evaluations +const customEval = loader.createEvaluation({ + name: 'Custom Test', + tool: 'chat', + input: { message: 'What is AI?' }, + metadata: { tags: ['custom'] } +}); + +server.onConnect(async client => { + // Run YAML-loaded evaluation + const result1 = await client.evaluate(chatEvals[0]); + + // Run custom evaluation + const result2 = await client.evaluate(customEval); + + console.log('Results:', { result1, result2 }); +}); + +await server.start(); +``` + +### Optional LLM Judge System + +```javascript +import { EvalServer, LLMJudge } from 'bo-eval-server'; + +const server = new EvalServer({ + authKey: 'secret-key', + port: 8080 +}); + +// Configure LLM judge (requires OPENAI_API_KEY) +if (process.env.OPENAI_API_KEY) { + const judge = new LLMJudge({ + apiKey: process.env.OPENAI_API_KEY, + model: 'gpt-4', + temperature: 0.1 + }); + + server.setJudge(judge); + + // Judge will automatically validate evaluation responses + // when evaluations have validation configuration +} + +await server.start(); +``` + +### Standalone Components + +#### EvaluationLoader + +```javascript +import { EvaluationLoader } from 'bo-eval-server/EvaluationLoader'; + +const loader = new EvaluationLoader('./evals'); + +// Load from directory +await loader.loadFromDirectory('./my-evals'); + +// Access evaluations +const all = loader.getAllEvaluations(); +const byCategory = loader.getEvaluationsByCategory('action-agent'); +const byId = loader.getEvaluationById('test-001'); + +// Filter evaluations +const filtered = loader.filterEvaluations({ + tool: 'chat', + enabled: true, + category: 'research-agent' +}); + +// Create evaluations programmatically +const custom = loader.createEvaluation({ + name: 'Custom Evaluation', + tool: 'chat', + input: { message: 'Hello world' } +}); +``` + +#### LLM Judge + +```javascript +import { LLMJudge } from 'bo-eval-server/judges/LLMJudge'; + +const judge = new LLMJudge({ + apiKey: 'your-openai-key', + model: 'gpt-4', + temperature: 0.1 +}); + +const evaluation = await judge.evaluate( + 'Summarize this article', + 'This article discusses...', + { + criteria: ['accuracy', 'completeness', 'clarity'], + model: 'gpt-4' + } +); + +console.log('Score:', evaluation.score); +console.log('Reasoning:', evaluation.reasoning); +``` + +## HTTP API (Optional) + +The `HTTPWrapper` provides REST endpoints for integration with external systems: + +```javascript +import { EvalServer, HTTPWrapper } from 'bo-eval-server'; + +const evalServer = new EvalServer({ port: 8080 }); +const httpWrapper = new HTTPWrapper(evalServer, { + port: 8081, + host: 'localhost' +}); + +await evalServer.start(); +await httpWrapper.start(); + +// HTTP wrapper status +console.log(httpWrapper.getStatus()); +``` + +### HTTP Endpoints + +Once the HTTP wrapper is running, you can use these endpoints: + +```bash +# Server status +curl http://localhost:8081/status + +# List clients +curl http://localhost:8081/clients + +# Trigger evaluation +curl -X POST http://localhost:8081/evaluate \ + -H 'Content-Type: application/json' \ + -d '{"clientId": "client-123", "evaluationId": "eval-001"}' + +# OpenAI-compatible responses endpoint +curl -X POST http://localhost:8081/v1/responses \ + -H 'Content-Type: application/json' \ + -d '{"input": "What is 2+2?"}' +``` + +## CLI Usage + +Interactive command-line interface for server management: + +```bash +# Using npm scripts +npm run cli + +# Using the binary +npx eval-server + +# Or directly +node src/cli/index.js +``` + +The CLI provides commands for: +- Server management +- Client connections +- Evaluation execution +- Real-time monitoring + +## Agent Protocol + +Your agent needs to implement the WebSocket protocol: + +### 1. Connect to WebSocket +```javascript +const ws = new WebSocket('ws://localhost:8080'); +``` + +### 2. Send Registration +```javascript +ws.send(JSON.stringify({ + type: 'register', + clientId: 'your-client-id', + secretKey: 'your-secret-key', + capabilities: ['chat', 'action'] +})); +``` + +### 3. Send Ready Signal +```javascript +ws.send(JSON.stringify({ + type: 'ready' +})); +``` + +### 4. Handle RPC Calls +```javascript +ws.on('message', (data) => { + const message = JSON.parse(data); + + if (message.jsonrpc === '2.0' && message.method === 'evaluate') { + // Handle evaluation request + const result = await handleEvaluation(message.params); + + // Send response + ws.send(JSON.stringify({ + jsonrpc: '2.0', + id: message.id, + result: result + })); + } +}); +``` + +## Architecture + +``` +src/ +โ”œโ”€โ”€ lib/ # Core library +โ”‚ โ”œโ”€โ”€ EvalServer.js # Main server class (WebSocket only) +โ”‚ โ”œโ”€โ”€ HTTPWrapper.js # Optional HTTP API wrapper +โ”‚ โ”œโ”€โ”€ EvaluationLoader.js # YAML evaluation loader +โ”‚ โ””โ”€โ”€ judges/ # Judge implementations +โ”‚ โ”œโ”€โ”€ Judge.js # Base judge interface +โ”‚ โ””โ”€โ”€ LLMJudge.js # LLM-based judge +โ”œโ”€โ”€ cli/ # CLI implementation +โ”‚ โ”œโ”€โ”€ CLI.js # CLI class +โ”‚ โ””โ”€โ”€ index.js # CLI entry point +โ”œโ”€โ”€ examples/ # Usage examples +โ”‚ โ”œโ”€โ”€ library-usage.js # Basic WebSocket-only example +โ”‚ โ””โ”€โ”€ with-http-wrapper.js # Example with HTTP API +โ””โ”€โ”€ [utilities] # Configuration, logging, etc. +``` + +### Design Principles + +- **Library-First**: Everything built as composable modules +- **Optional Components**: HTTP API, LLM Judge, YAML loading all optional +- **Clean Architecture**: No external dependencies for core functionality +- **Event-Driven**: React to client connections with callbacks +- **Programmatic**: Full control through code, no required config files + +## Examples + +### Example 1: Simple Chat Evaluation +```javascript +import { EvalServer } from 'bo-eval-server'; + +const server = new EvalServer({ authKey: 'test', port: 8080 }); + +server.onConnect(async client => { + const response = await client.evaluate({ + id: "chat_test", + name: "Simple Chat", + tool: "chat", + input: { message: "Hello, how are you?" } + }); + + console.log('Chat response:', response.output.response); +}); + +await server.start(); +``` + +### Example 2: Action Agent Evaluation +```javascript +import { EvalServer } from 'bo-eval-server'; + +const server = new EvalServer({ authKey: 'test', port: 8080 }); + +server.onConnect(async client => { + const response = await client.evaluate({ + id: "action_test", + name: "Click Button", + tool: "action", + input: { + objective: "Click the submit button on the form", + url: "https://example.com/form" + } + }); + + console.log('Action completed:', response.output.success); +}); + +await server.start(); +``` + +### Example 3: Batch Evaluations +```javascript +import { EvalServer } from 'bo-eval-server'; + +const server = new EvalServer({ authKey: 'test', port: 8080 }); + +// Load evaluations from YAML +await server.loadEvaluations('./evals'); + +server.onConnect(async client => { + const chatEvals = server.evaluationLoader.filterEvaluations({ + tool: 'chat', + enabled: true + }); + + // Run all chat evaluations + for (const evaluation of chatEvals.slice(0, 5)) { + try { + const result = await client.evaluate(evaluation); + console.log(`โœ… ${evaluation.name}: ${result.status}`); + } catch (error) { + console.log(`โŒ ${evaluation.name}: ${error.message}`); + } + } +}); + +await server.start(); +``` + +## Environment Variables + +```bash +# Optional - only needed if using LLM Judge +OPENAI_API_KEY=your-openai-api-key + +# Optional - server configuration +PORT=8080 +HOST=localhost +LOG_LEVEL=info +LOG_DIR=./logs + +# Optional - RPC configuration +RPC_TIMEOUT=1500000 +MAX_CONCURRENT_EVALUATIONS=10 +``` + +## Documentation + +- **[CLAUDE.md](./CLAUDE.md)** - Architecture and implementation details +- **[docs/](./docs/)** - Protocol specifications and setup guides +- **[examples/](./examples/)** - Working code examples + +--- + +The library provides a clean, modular architecture for building custom evaluation workflows with LLM agents. \ No newline at end of file diff --git a/eval-server/nodejs/clients/1233ae25-9f9e-4f77-924d-865f7d615cef.yaml b/eval-server/nodejs/clients/1233ae25-9f9e-4f77-924d-865f7d615cef.yaml new file mode 100644 index 0000000..f5b865f --- /dev/null +++ b/eval-server/nodejs/clients/1233ae25-9f9e-4f77-924d-865f7d615cef.yaml @@ -0,0 +1,12 @@ +client: + id: 1233ae25-9f9e-4f77-924d-865f7d615cef + name: DevTools Client 1233ae25 + secret_key: hello + description: Auto-generated DevTools evaluation client +settings: + max_concurrent_evaluations: 3 + default_timeout: 45000 + retry_policy: + max_retries: 2 + backoff_multiplier: 2 + initial_delay: 1000 diff --git a/eval-server/nodejs/docs/CLIENT_SETUP.md b/eval-server/nodejs/docs/CLIENT_SETUP.md new file mode 100644 index 0000000..53502ae --- /dev/null +++ b/eval-server/nodejs/docs/CLIENT_SETUP.md @@ -0,0 +1,445 @@ +# Client Setup Guide + +## Overview + +This guide explains how to set up a new evaluation client to connect to the evaluation server. Clients can be any application that implements the WebSocket evaluation protocol, such as Chrome DevTools or custom test agents. + +## Prerequisites + +- WebSocket client library +- JSON-RPC 2.0 implementation +- UUID v4 generator +- Tools/agents to execute evaluations + +## Setup Steps + +### 1. Generate Client ID + +Generate a unique UUID v4 for your client: + +```javascript +// JavaScript example +import { v4 as uuidv4 } from 'uuid'; +const clientId = uuidv4(); // e.g., "550e8400-e29b-41d4-a716-446655440000" +``` + +Store this ID persistently - it will be used for all connections. + +### 2. Request YAML Configuration + +Contact the evaluation server administrator to: +1. Create a YAML evaluation file for your client ID +2. Optionally set up a secret key for authentication +3. Configure appropriate evaluations for your client + +Example request: +``` +Client ID: 550e8400-e29b-41d4-a716-446655440000 +Client Name: Chrome DevTools Production +Tools Available: extract_schema_data, research_agent, action_agent +Purpose: Automated regression testing +``` + +### 3. Implement WebSocket Connection + +```javascript +class EvaluationClient { + constructor(serverUrl, clientId, secretKey) { + this.serverUrl = serverUrl; + this.clientId = clientId; + this.secretKey = secretKey; + this.ws = null; + } + + connect() { + this.ws = new WebSocket(this.serverUrl); + + this.ws.onopen = () => { + console.log('Connected to evaluation server'); + }; + + this.ws.onmessage = (event) => { + this.handleMessage(JSON.parse(event.data)); + }; + + this.ws.onerror = (error) => { + console.error('WebSocket error:', error); + }; + } +} +``` + +### 4. Implement Protocol Messages + +#### Handle Welcome Message +```javascript +handleMessage(message) { + switch (message.type) { + case 'welcome': + // Server is ready, send registration + this.register(); + break; + + case 'registration_ack': + if (message.status === 'accepted') { + console.log(`Registered! ${message.evaluationsCount} evaluations assigned`); + this.sendReady(); + } else { + console.error('Registration rejected:', message.reason); + } + break; + + default: + // Handle other messages... + } +} +``` + +#### Send Registration +```javascript +register() { + this.send({ + type: 'register', + clientId: this.clientId, + secretKey: this.secretKey, // Optional + capabilities: { + tools: ['extract_schema_data', 'research_agent'], + maxConcurrency: 3, + version: '1.0.0' + } + }); +} +``` + +#### Send Ready Signal +```javascript +sendReady() { + this.send({ + type: 'ready', + timestamp: new Date().toISOString() + }); +} +``` + +### 5. Implement RPC Handler + +```javascript +handleMessage(message) { + // ... existing code ... + + // Handle JSON-RPC requests + if (message.jsonrpc === '2.0' && message.method) { + this.handleRpcRequest(message); + } +} + +async handleRpcRequest(request) { + if (request.method === 'evaluate') { + try { + const result = await this.executeEvaluation(request.params); + + this.send({ + jsonrpc: '2.0', + result: { + status: 'success', + output: result.output, + executionTime: result.duration, + toolCalls: result.toolCalls, + metadata: result.metadata + }, + id: request.id + }); + } catch (error) { + this.send({ + jsonrpc: '2.0', + error: { + code: -32000, + message: error.message, + data: { + tool: request.params.tool, + error: error.toString(), + timestamp: new Date().toISOString() + } + }, + id: request.id + }); + } + } +} +``` + +### 6. Implement Tool Execution + +```javascript +async executeEvaluation(params) { + const startTime = Date.now(); + + // Send status update + this.send({ + type: 'status', + evaluationId: params.evaluationId, + status: 'running', + progress: 0.1, + message: 'Starting evaluation...' + }); + + // Execute the appropriate tool + let result; + switch (params.tool) { + case 'extract_schema_data': + result = await this.extractSchema(params.url, params.input); + break; + + case 'research_agent': + result = await this.runResearchAgent(params.url, params.input); + break; + + default: + throw new Error(`Unknown tool: ${params.tool}`); + } + + const executionTime = Date.now() - startTime; + + return { + output: result, + duration: executionTime, + toolCalls: [{ + tool: params.tool, + timestamp: new Date().toISOString(), + duration: executionTime, + status: 'success' + }], + metadata: { + url: params.url, + toolVersion: '1.0.0' + } + }; +} +``` + +## Chrome DevTools Integration + +For Chrome DevTools specifically: + +### 1. Update EvaluationConfig + +```typescript +// In EvaluationConfig.ts +interface EvaluationConfiguration { + enabled: boolean; + endpoint: string; + secretKey?: string; + clientId?: string; // Add client ID field +} + +// Generate and store client ID +function ensureClientId(): string { + let clientId = localStorage.getItem('ai_chat_evaluation_client_id'); + if (!clientId) { + clientId = generateUUID(); + localStorage.setItem('ai_chat_evaluation_client_id', clientId); + } + return clientId; +} +``` + +### 2. Create Evaluation Agent + +```typescript +// EvaluationAgent.ts +import { WebSocketRPCClient } from '../common/WebSocketRPCClient.js'; +import { ToolRegistry } from '../agent_framework/ConfigurableAgentTool.js'; + +export class EvaluationAgent { + private client: WebSocketRPCClient; + private clientId: string; + + constructor(config: EvaluationConfiguration) { + this.clientId = config.clientId || ensureClientId(); + this.client = new WebSocketRPCClient({ + endpoint: config.endpoint, + secretKey: config.secretKey + }); + + this.setupHandlers(); + } + + private setupHandlers(): void { + this.client.on('connected', () => { + this.register(); + }); + + // Handle RPC requests + this.client.on('rpc-request', async (request) => { + if (request.method === 'evaluate') { + const result = await this.handleEvaluation(request.params); + return result; + } + }); + } + + private async handleEvaluation(params: any): Promise { + const tool = ToolRegistry.getRegisteredTool(params.tool); + if (!tool) { + throw new Error(`Tool not found: ${params.tool}`); + } + + // Execute tool with params.input + const result = await tool.execute(params.input); + + return { + status: 'success', + output: result, + executionTime: Date.now() - startTime + }; + } +} +``` + +## Testing Your Client + +### 1. Local Testing + +Use the example agent to test your server setup: + +```bash +# In bo-eval-server directory +npm test +``` + +### 2. Connection Test + +```javascript +// Quick connection test +const client = new EvaluationClient( + 'ws://localhost:8080', + 'your-client-id', + 'optional-secret' +); + +client.connect(); + +// Should see: +// Connected to evaluation server +// Registered! X evaluations assigned +``` + +### 3. Manual Evaluation Test + +You can trigger evaluations manually through the server's CLI: + +```bash +npm run cli +> run-evaluation your-client-id evaluation-id +``` + +## Troubleshooting + +### Connection Issues + +1. **Check server is running** + ```bash + curl -i -N -H "Connection: Upgrade" -H "Upgrade: websocket" http://localhost:8080 + ``` + +2. **Verify client ID exists** + - Check `clients/{your-client-id}.yaml` exists on server + - Ensure client ID format is valid UUID v4 + +3. **Authentication failures** + - Verify secret key matches server configuration + - Check for typos in client ID or secret + +### Evaluation Failures + +1. **Tool not found** + - Ensure tool name in YAML matches client capabilities + - Verify tool is registered in your client + +2. **Timeouts** + - Increase timeout in YAML configuration + - Check for infinite loops in tool execution + +3. **Invalid input** + - Validate input against expected schema + - Check for required fields + +## Security Best Practices + +1. **Store credentials securely** + - Never hardcode secret keys + - Use environment variables or secure storage + +2. **Validate inputs** + - Sanitize URLs before navigation + - Validate schemas before execution + +3. **Resource limits** + - Implement timeout handling + - Limit concurrent evaluations + +4. **Use WSS in production** + ```javascript + const client = new EvaluationClient( + 'wss://eval-server.example.com', // Use WSS + clientId, + secretKey + ); + ``` + +## Example: Minimal Client + +```javascript +// minimal-client.js +import WebSocket from 'ws'; + +const CLIENT_ID = 'your-uuid-here'; +const SECRET_KEY = 'your-secret-here'; + +const ws = new WebSocket('ws://localhost:8080'); + +ws.on('open', () => { + console.log('Connected'); +}); + +ws.on('message', async (data) => { + const msg = JSON.parse(data); + + if (msg.type === 'welcome') { + // Register + ws.send(JSON.stringify({ + type: 'register', + clientId: CLIENT_ID, + secretKey: SECRET_KEY, + capabilities: { + tools: ['extract_schema_data'], + maxConcurrency: 1, + version: '1.0.0' + } + })); + } + + if (msg.type === 'registration_ack' && msg.status === 'accepted') { + // Send ready + ws.send(JSON.stringify({ + type: 'ready', + timestamp: new Date().toISOString() + })); + } + + if (msg.jsonrpc && msg.method === 'evaluate') { + // Simple evaluation response + ws.send(JSON.stringify({ + jsonrpc: '2.0', + result: { + status: 'success', + output: { message: 'Evaluation completed' }, + executionTime: 1000 + }, + id: msg.id + })); + } +}); + +ws.on('error', console.error); +``` \ No newline at end of file diff --git a/eval-server/nodejs/docs/PROTOCOL.md b/eval-server/nodejs/docs/PROTOCOL.md new file mode 100644 index 0000000..694e58a --- /dev/null +++ b/eval-server/nodejs/docs/PROTOCOL.md @@ -0,0 +1,310 @@ +# WebSocket Evaluation Protocol + +## Overview + +This document describes the WebSocket communication protocol between evaluation clients (e.g., Chrome DevTools) and the evaluation server. The protocol supports client registration, authentication, and bidirectional evaluation task execution using JSON-RPC 2.0. + +## Connection Flow + +``` +Client Server + | | + |------ WebSocket Connect ------>| + | | + |<----- Welcome Message ---------| + | | + |------ Register Message ------->| + | | + |<----- Registration ACK ---------| + | | + |------ Ready Signal ----------->| + | | + |<===== Evaluation Loop ========>| +``` + +## Message Types + +### 1. Client โ†’ Server Messages + +#### 1.1 Registration Message +Sent immediately after receiving the welcome message to register the client with the server. + +```json +{ + "type": "register", + "clientId": "550e8400-e29b-41d4-a716-446655440000", + "secretKey": "optional-secret-key", // Optional field for authentication + "capabilities": { + "tools": ["extract_schema_data", "research_agent", "action_agent"], + "maxConcurrency": 3, + "version": "1.0.0" + } +} +``` + +**Fields:** +- `type`: Must be "register" +- `clientId`: UUID v4 format, unique identifier for the client +- `secretKey`: Optional authentication key +- `capabilities`: Object describing client capabilities + - `tools`: Array of tool names the client can execute + - `maxConcurrency`: Maximum number of concurrent evaluations + - `version`: Client version string + +#### 1.2 Ready Signal +Indicates the client is ready to receive evaluation tasks. + +```json +{ + "type": "ready", + "timestamp": "2024-01-01T00:00:00Z" +} +``` + +#### 1.3 Status Update +Provides progress updates for running evaluations. + +```json +{ + "type": "status", + "evaluationId": "eval-123", + "status": "running" | "completed" | "failed", + "progress": 0.5, // Optional, value between 0 and 1 + "message": "Processing page content..." // Optional status message +} +``` + +#### 1.4 Heartbeat (Ping) +Keep-alive message to maintain connection. + +```json +{ + "type": "ping", + "timestamp": "2024-01-01T00:00:00Z" +} +``` + +### 2. Server โ†’ Client Messages + +#### 2.1 Welcome Message +Sent immediately after WebSocket connection is established. + +```json +{ + "type": "welcome", + "serverId": "server-001", + "version": "1.0.0", + "timestamp": "2024-01-01T00:00:00Z" +} +``` + +#### 2.2 Registration Acknowledgment +Response to client registration. + +```json +{ + "type": "registration_ack", + "clientId": "550e8400-e29b-41d4-a716-446655440000", + "status": "accepted" | "rejected", + "message": "Client registered successfully", + "evaluationsCount": 5, // Number of evaluations assigned to this client + "reason": "Invalid secret key" // Only present if status is "rejected" +} +``` + +#### 2.3 Heartbeat Response (Pong) +Response to client ping. + +```json +{ + "type": "pong", + "timestamp": "2024-01-01T00:00:00Z" +} +``` + +## JSON-RPC 2.0 Evaluation Protocol + +The evaluation tasks are sent using JSON-RPC 2.0 protocol over the WebSocket connection. + +### 3. Evaluation Request (Server โ†’ Client) + +#### 3.1 Evaluate Method +Requests the client to execute an evaluation task. + +```json +{ + "jsonrpc": "2.0", + "method": "evaluate", + "params": { + "evaluationId": "wikipedia-chrome-devtools-001", + "name": "Extract Chrome DevTools Wikipedia Article", + "url": "https://en.wikipedia.org/wiki/Chrome_DevTools", + "tool": "extract_schema_data", + "input": { + "schema": { + "type": "object", + "properties": { + "title": {"type": "string"}, + "summary": {"type": "string"}, + "tableOfContents": { + "type": "array", + "items": {"type": "string"} + } + } + } + }, + "timeout": 30000, // Timeout in milliseconds + "metadata": { + "tags": ["schema-extraction", "wikipedia"], + "retries": 2, + "priority": "normal" + } + }, + "id": "rpc-001" +} +``` + +**Parameters:** +- `evaluationId`: Unique identifier for this evaluation (from YAML definition) +- `name`: Human-readable name of the evaluation +- `url`: Target URL for the evaluation +- `tool`: Name of the tool to execute +- `input`: Tool-specific input parameters +- `timeout`: Maximum execution time in milliseconds +- `metadata`: Additional evaluation metadata + +### 4. Evaluation Response (Client โ†’ Server) + +#### 4.1 Success Response +Sent when evaluation completes successfully. + +```json +{ + "jsonrpc": "2.0", + "result": { + "status": "success", + "output": { + "title": "Chrome DevTools", + "summary": "Chrome DevTools is a set of web developer tools built directly into the Google Chrome browser.", + "tableOfContents": [ + "Overview", + "Features", + "History", + "Usage" + ] + }, + "executionTime": 2500, // Total execution time in milliseconds + "toolCalls": [ + { + "tool": "extract_schema_data", + "timestamp": "2024-01-01T00:00:00Z", + "duration": 2400, + "status": "success" + } + ], + "metadata": { + "pageLoadTime": 800, + "extractionTime": 1700, + "retryCount": 0 + } + }, + "id": "rpc-001" +} +``` + +#### 4.2 Error Response +Sent when evaluation fails. + +```json +{ + "jsonrpc": "2.0", + "error": { + "code": -32000, + "message": "Tool execution failed", + "data": { + "tool": "extract_schema_data", + "error": "Page load timeout after 30000ms", + "url": "https://en.wikipedia.org/wiki/Chrome_DevTools", + "timestamp": "2024-01-01T00:00:00Z", + "stackTrace": "Error: Timeout...\n at PageLoader.load..." // Optional + } + }, + "id": "rpc-001" +} +``` + +## Error Codes + +Standard JSON-RPC 2.0 error codes: +- `-32700`: Parse error - Invalid JSON was received +- `-32600`: Invalid request - JSON is not a valid request object +- `-32601`: Method not found - Method does not exist +- `-32602`: Invalid params - Invalid method parameters +- `-32603`: Internal error - Internal JSON-RPC error + +Custom error codes for evaluation: +- `-32000`: Tool execution error - Tool failed during execution +- `-32001`: Timeout error - Evaluation exceeded timeout +- `-32002`: Authentication error - Invalid or missing credentials +- `-32003`: Rate limit exceeded - Too many requests +- `-32004`: Invalid tool - Requested tool not available +- `-32005`: Resource error - Unable to access required resources + +## Connection Management + +### Reconnection +- Clients should implement automatic reconnection with exponential backoff +- On reconnection, clients must re-register with the same clientId +- Server maintains evaluation state across reconnections + +### Timeouts +- Default connection timeout: 60 seconds +- Ping interval: 30 seconds +- Evaluation timeout: Specified per evaluation in YAML + +### Rate Limiting +- Server may implement rate limiting per client +- Rate limit errors use code `-32003` +- Clients should respect rate limit headers in error responses + +## Security Considerations + +1. **Authentication**: Clients may use optional secret keys for authentication +2. **Transport Security**: Production deployments should use WSS (WebSocket Secure) +3. **Input Validation**: All inputs should be validated against schemas +4. **Resource Limits**: Enforce timeouts and memory limits for evaluations + +## Examples + +### Complete Flow Example + +1. **Client connects and registers:** +```json +// Client โ†’ Server +{"type": "register", "clientId": "550e8400-e29b-41d4-a716-446655440000", "capabilities": {"tools": ["extract_schema_data"], "maxConcurrency": 3, "version": "1.0.0"}} + +// Server โ†’ Client +{"type": "registration_ack", "clientId": "550e8400-e29b-41d4-a716-446655440000", "status": "accepted", "message": "Client registered successfully", "evaluationsCount": 2} +``` + +2. **Client signals ready:** +```json +// Client โ†’ Server +{"type": "ready", "timestamp": "2024-01-01T00:00:00Z"} +``` + +3. **Server sends evaluation:** +```json +// Server โ†’ Client +{"jsonrpc": "2.0", "method": "evaluate", "params": {"evaluationId": "test-001", "url": "https://example.com", "tool": "extract_schema_data", "input": {"schema": {"type": "object", "properties": {"title": {"type": "string"}}}}, "timeout": 30000}, "id": "rpc-001"} +``` + +4. **Client returns result:** +```json +// Client โ†’ Server +{"jsonrpc": "2.0", "result": {"status": "success", "output": {"title": "Example Domain"}, "executionTime": 1500}, "id": "rpc-001"} +``` + +## Version History + +- **1.0.0** (2024-01-01): Initial protocol version \ No newline at end of file diff --git a/eval-server/nodejs/docs/TRIGGERING_EVALUATIONS.md b/eval-server/nodejs/docs/TRIGGERING_EVALUATIONS.md new file mode 100644 index 0000000..4dd0078 --- /dev/null +++ b/eval-server/nodejs/docs/TRIGGERING_EVALUATIONS.md @@ -0,0 +1,306 @@ +# How to Trigger Evaluations + +This guide explains all the different ways to trigger evaluations in the system. + +## Prerequisites + +1. **Server Running**: Make sure the evaluation server is running: + ```bash + npm start + ``` + +2. **Client Connected**: A DevTools client must be connected and ready. You'll see logs like: + ``` + [info]: Client registered successfully {"clientId":"550e8400...","capabilities":"extract_schema_data, research_agent"} + [info]: Client ready for evaluations {"clientId":"550e8400..."} + ``` + +## Method 1: Interactive CLI + +Start the interactive CLI: +```bash +npm run cli +``` + +### Available Commands + +#### List Clients and Evaluations +```bash +eval-server> clients +``` +This shows all registered clients and their available evaluations with current status. + +#### Run Specific Evaluation +```bash +eval-server> run +``` +Example: +```bash +eval-server> run 550e8400-e29b-41d4-a716-446655440000 wikipedia-chrome-devtools-001 +``` + +#### Run All Evaluations for a Client +```bash +eval-server> run-all +``` +Example: +```bash +eval-server> run-all 550e8400-e29b-41d4-a716-446655440000 +``` + +#### Check Status +```bash +eval-server> status +``` +Shows server status, connected clients, and active evaluations. + +#### Get Help +```bash +eval-server> help +``` + +## Method 2: HTTP API + +The server also exposes an HTTP API on port 8081. + +### Get Server Status +```bash +curl http://localhost:8081/status +``` + +### List All Clients +```bash +curl http://localhost:8081/clients +``` + +### Get Client Evaluations +```bash +curl "http://localhost:8081/clients/:id/evaluations?id=550e8400-e29b-41d4-a716-446655440000" +``` + +### Trigger Specific Evaluation +```bash +curl -X POST http://localhost:8081/evaluate \\ + -H "Content-Type: application/json" \\ + -d '{ + "clientId": "550e8400-e29b-41d4-a716-446655440000", + "evaluationId": "wikipedia-chrome-devtools-001" + }' +``` + +### Trigger All Evaluations for a Client +```bash +curl -X POST http://localhost:8081/evaluate \\ + -H "Content-Type: application/json" \\ + -d '{ + "clientId": "550e8400-e29b-41d4-a716-446655440000", + "runAll": true + }' +``` + + +## Method 3: Programmatic Integration + +You can integrate the evaluation system into your own applications: + +### Node.js Example +```javascript +import { EvaluationServer } from './src/server.js'; + +const server = new EvaluationServer(); +server.start(); + +// Wait for client to connect +setTimeout(async () => { + const clientId = '550e8400-e29b-41d4-a716-446655440000'; + const evaluationId = 'wikipedia-chrome-devtools-001'; + + // Get client connection + const connection = server.connectedAgents.get(clientId); + if (connection && connection.ready) { + // Get evaluation + const evaluation = server.getClientManager() + .getClientEvaluations(clientId) + .find(e => e.id === evaluationId); + + if (evaluation) { + // Execute evaluation + await server.executeEvaluation(connection, evaluation); + console.log('Evaluation completed!'); + } + } +}, 5000); +``` + +### Python Example (using HTTP API) +```python +import requests +import json + +def trigger_evaluation(client_id, evaluation_id): + response = requests.post('http://localhost:8081/evaluate', + headers={'Content-Type': 'application/json'}, + json={ + 'clientId': client_id, + 'evaluationId': evaluation_id + }) + + if response.status_code == 200: + return response.json() + else: + raise Exception(f"Failed to trigger evaluation: {response.text}") + +# Example usage +result = trigger_evaluation( + '550e8400-e29b-41d4-a716-446655440000', + 'wikipedia-chrome-devtools-001' +) +print(json.dumps(result, indent=2)) +``` + +## Method 4: Webhook Integration + +You can set up webhooks to trigger evaluations from external systems: + +### GitHub Actions Example +```yaml +name: Run Evaluations +on: + schedule: + - cron: '0 9 * * *' # Daily at 9 AM + workflow_dispatch: # Manual trigger + +jobs: + evaluate: + runs-on: ubuntu-latest + steps: + - name: Trigger Evaluation + run: | + curl -X POST ${{ secrets.EVAL_SERVER_URL }}/evaluate \\ + -H "Content-Type: application/json" \\ + -d '{ + "clientId": "${{ secrets.CLIENT_ID }}", + "runAll": true + }' +``` + +### Slack Bot Example +```javascript +// Slack bot command: /eval wikipedia +app.command('/eval', async ({ command, ack, respond }) => { + await ack(); + + const evaluationId = command.text.trim(); + const clientId = process.env.DEFAULT_CLIENT_ID; + + try { + const response = await fetch('http://localhost:8081/evaluate', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ clientId, evaluationId }) + }); + + const result = await response.json(); + await respond(`โœ… Evaluation '${evaluationId}' completed successfully!`); + } catch (error) { + await respond(`โŒ Evaluation failed: ${error.message}`); + } +}); +``` + +## Monitoring Evaluation Results + +### Real-time Logs +Monitor the server logs to see evaluation progress: +```bash +tail -f logs/combined.log +``` + +### Status Checking +Check evaluation status via API: +```bash +# Get all evaluations for a client +curl "http://localhost:8081/clients/:id/evaluations?id=CLIENT_ID" + +# Check server status +curl http://localhost:8081/status +``` + +### Log Files +Evaluation results are logged to: +- `logs/combined.log` - All logs +- `logs/error.log` - Error logs only + +## Troubleshooting + +### Client Not Connected +``` +โŒ Client 'CLIENT_ID' is not connected or not ready +``` +**Solutions:** +1. Make sure DevTools is running and connected +2. Check that the client ID matches +3. Verify the WebSocket connection is working + +### Evaluation Not Found +``` +โŒ Evaluation 'EVAL_ID' not found for client 'CLIENT_ID' +``` +**Solutions:** +1. Check the YAML file for the correct evaluation ID +2. Ensure the evaluation is enabled (`enabled: true`) +3. Reload the server if you changed the YAML file + +### Tool Not Available +``` +Tool execution failed: Tool not found: tool_name +``` +**Solutions:** +1. Verify the tool is registered in DevTools +2. Check that the tool name matches exactly +3. Ensure DevTools has the required capabilities + +### Connection Timeout +``` +WebSocket connection failed +``` +**Solutions:** +1. Check if the server is running on the correct port +2. Verify firewall settings +3. Check network connectivity + +## Best Practices + +1. **Start Simple**: Begin with manual evaluations before setting up automation +2. **Monitor Logs**: Always monitor logs when running evaluations +3. **Test Connections**: Use the `status` command to verify everything is connected +4. **Gradual Rollout**: Test individual evaluations before running batch operations +5. **Error Handling**: Implement proper error handling in automated systems +6. **Rate Limiting**: Don't run too many evaluations simultaneously + +## Example Workflow + +Here's a typical workflow for triggering evaluations: + +```bash +# 1. Start the server +npm start + +# 2. In another terminal, start the CLI +npm run cli + +# 3. Check status and clients +eval-server> status +eval-server> clients + +# 4. Run a specific evaluation +eval-server> run 550e8400-e29b-41d4-a716-446655440000 wikipedia-chrome-devtools-001 + +# 5. Check results in logs +# (Monitor the server logs for detailed results) + +# 6. Run all evaluations if needed +eval-server> run-all 550e8400-e29b-41d4-a716-446655440000 +``` + +This comprehensive guide covers all the ways to trigger and monitor evaluations in your system! \ No newline at end of file diff --git a/eval-server/nodejs/docs/YAML_SCHEMA.md b/eval-server/nodejs/docs/YAML_SCHEMA.md new file mode 100644 index 0000000..ea15dcd --- /dev/null +++ b/eval-server/nodejs/docs/YAML_SCHEMA.md @@ -0,0 +1,315 @@ +# YAML Evaluation Schema Documentation + +## Overview + +This document describes the YAML schema used to define evaluations for each client. Each client has a dedicated YAML file stored in the `clients/` directory, named after their client ID. + +## File Location + +``` +bo-eval-server/ +โ””โ”€โ”€ clients/ + โ”œโ”€โ”€ 550e8400-e29b-41d4-a716-446655440000.yaml + โ”œโ”€โ”€ 771f9500-f39c-52e5-b827-557766551111.yaml + โ””โ”€โ”€ ... +``` + +## Schema Structure + +### Root Level + +```yaml +# Client identification and authentication +client: + id: "550e8400-e29b-41d4-a716-446655440000" # Required: UUID v4 + name: "Chrome DevTools Agent" # Required: Human-readable name + secret_key: "optional-secret-key" # Optional: Authentication key + description: "Production DevTools instance" # Optional: Client description + +# Client-specific settings +settings: + max_concurrent_evaluations: 3 # Maximum parallel evaluations + default_timeout: 30000 # Default timeout in milliseconds + retry_policy: + max_retries: 2 # Maximum retry attempts + backoff_multiplier: 2 # Exponential backoff multiplier + initial_delay: 1000 # Initial retry delay in ms + +# List of evaluations assigned to this client +evaluations: + - id: "eval-001" + # ... evaluation definition + - id: "eval-002" + # ... evaluation definition +``` + +### Evaluation Definition + +Each evaluation in the `evaluations` array follows this structure: + +```yaml +- id: "wikipedia-chrome-devtools-001" # Required: Unique evaluation ID + name: "Extract Chrome DevTools Wikipedia" # Required: Display name + description: "Extract structured data" # Optional: Detailed description + enabled: true # Optional: Enable/disable (default: true) + + # Target configuration + target: + url: "https://en.wikipedia.org/wiki/Chrome_DevTools" # Required: Target URL + wait_for: "networkidle" # Optional: Wait condition (load|domcontentloaded|networkidle) + wait_timeout: 5000 # Optional: Wait timeout in ms + + # Tool configuration + tool: "extract_schema_data" # Required: Tool to execute + timeout: 30000 # Optional: Override default timeout + + # Tool-specific input + input: + schema: # For extract_schema_data tool + type: "object" + properties: + title: + type: "string" + summary: + type: "string" + + + # Validation configuration + validation: + type: "llm-judge" # llm-judge|snapshot|hybrid + + # For llm-judge validation + llm_judge: + model: "gpt-4o-mini" # LLM model to use + temperature: 0.3 # Model temperature + criteria: # Evaluation criteria + - "Title should be accurately extracted" + - "Summary should be comprehensive" + - "All required fields should be present" + + # Visual verification settings + visual_verification: + enabled: true + capture_before: true # Screenshot before tool execution + capture_after: true # Screenshot after tool execution + prompts: # Custom verification prompts + - "Verify the title matches the page header" + + # For snapshot validation + snapshot: + structure_only: false # Compare structure only + exclude_paths: # Paths to exclude from comparison + - "timestamp" + - "random_id" + sanitizers: # Value sanitization rules + - path: "date" + pattern: "\\d{4}-\\d{2}-\\d{2}" + replacement: "YYYY-MM-DD" + + # For hybrid validation (both llm-judge and snapshot) + hybrid: + weight_llm: 0.7 # Weight for LLM score + weight_snapshot: 0.3 # Weight for snapshot score + + # Metadata and tags + metadata: + tags: # Categorization tags + - "schema-extraction" + - "wikipedia" + - "regression" + priority: "normal" # low|normal|high + owner: "team-browser" # Responsible team/person + created: "2024-01-01" # Creation date + modified: "2024-01-15" # Last modification date +``` + +## Tool-Specific Input Schemas + +### extract_schema_data + +```yaml +input: + schema: # JSON Schema for extraction + type: "object" + properties: + title: + type: "string" + items: + type: "array" + items: + type: "object" + properties: + name: + type: "string" + price: + type: "number" +``` + +### research_agent + +```yaml +input: + query: "Research the latest AI developments" # Research query + max_iterations: 5 # Maximum agent iterations + include_sources: true # Include source URLs + depth: "comprehensive" # shallow|moderate|comprehensive +``` + +### action_agent + +```yaml +input: + task: "Fill out the contact form" # Task description + form_data: # Data to use + name: "Test User" + email: "test@example.com" + verify_completion: true # Verify task completion +``` + +### web_task_agent + +```yaml +input: + instructions: | # Multi-line instructions + 1. Navigate to the products page + 2. Search for "laptop" + 3. Filter by price < $1000 + 4. Extract the first 5 results + expected_outcome: "List of laptops under $1000" + max_steps: 10 # Maximum action steps +``` + +## Complete Example + +```yaml +client: + id: "550e8400-e29b-41d4-a716-446655440000" + name: "Chrome DevTools Production Agent" + secret_key: "sk-prod-abc123" + description: "Production DevTools instance for continuous evaluation" + +settings: + max_concurrent_evaluations: 5 + default_timeout: 45000 + retry_policy: + max_retries: 3 + backoff_multiplier: 2 + initial_delay: 2000 + +evaluations: + # Schema extraction evaluation + - id: "schema-extract-wiki-001" + name: "Wikipedia Chrome DevTools Schema Extraction" + description: "Test schema extraction on Wikipedia article" + enabled: true + + target: + url: "https://en.wikipedia.org/wiki/Chrome_DevTools" + wait_for: "networkidle" + wait_timeout: 5000 + + tool: "extract_schema_data" + timeout: 30000 + + input: + schema: + type: "object" + properties: + title: + type: "string" + summary: + type: "string" + features: + type: "array" + items: + type: "string" + lastModified: + type: "string" + + + validation: + type: "hybrid" + llm_judge: + model: "gpt-4o" + criteria: + - "All schema fields must be populated" + - "Summary should be at least 100 characters" + - "Features should contain at least 5 items" + snapshot: + exclude_paths: + - "lastModified" + hybrid: + weight_llm: 0.6 + weight_snapshot: 0.4 + + metadata: + tags: ["schema", "wikipedia", "daily"] + priority: "high" + owner: "qa-team" + + # Research agent evaluation + - id: "research-agent-news-001" + name: "Research Latest Tech News" + description: "Test research agent on current tech news" + enabled: true + + target: + url: "https://news.ycombinator.com" + + tool: "research_agent" + timeout: 60000 + + input: + query: "What are the top 3 technology stories today?" + max_iterations: 5 + include_sources: true + depth: "moderate" + + + validation: + type: "llm-judge" + llm_judge: + model: "gpt-4o-mini" + temperature: 0.3 + criteria: + - "Response includes 3 distinct technology stories" + - "Each story has a clear summary" + - "Sources are provided for each story" + - "Information is current (from today)" + + metadata: + tags: ["research", "news", "tech"] + priority: "normal" +``` + +## Validation Rules + +1. **Client ID**: Must be valid UUID v4 format +2. **Evaluation IDs**: Must be unique within the file +3. **Tool names**: Must match registered tools in the client +4. **URLs**: Must be valid HTTP/HTTPS URLs +5. **Timeouts**: Must be positive integers (milliseconds) + +## YAML Best Practices + +1. Use meaningful IDs that describe the evaluation +2. Group related evaluations together +3. Use tags consistently for categorization +4. Document complex input schemas with comments +5. Keep validation criteria specific and measurable +6. Use anchors and aliases for repeated configurations: + +```yaml +# Define anchor +defaults: &defaults + timeout: 30000 + retry_policy: + max_retries: 2 + +# Use alias +evaluations: + - id: "eval-001" + <<: *defaults # Inherits timeout and retry_policy + name: "Test 1" + # ... +``` \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/a11y-001.yaml b/eval-server/nodejs/evals/action-agent/a11y-001.yaml new file mode 100644 index 0000000..7c7947a --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/a11y-001.yaml @@ -0,0 +1,46 @@ +# Accessibility action test +id: "a11y-001" +name: "Click Using ARIA Label" +description: "Test clicking an element identified primarily by ARIA attributes" +enabled: true + +target: + url: "https://www.w3.org/WAI/ARIA/apg/patterns/button/examples/button/" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Click the button with aria-label \"Print Page\"" + reasoning: "Testing action selection using accessibility attributes" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Used accessibility tree to find elements" + - "Correctly identified element by ARIA label" + - "Successfully clicked the target button" + - "Demonstrated understanding of accessibility attributes" + - "No reliance on visual appearance alone" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify the Print Page button was successfully clicked" + - "Check if any print dialog or print preview appeared" + - "Confirm the button showed visual feedback (pressed state)" + - "Ensure the action was performed on the correct accessibility-labeled element" + +metadata: + tags: ["action", "accessibility", "aria", "click", "a11y"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/accordion-001.yaml b/eval-server/nodejs/evals/action-agent/accordion-001.yaml new file mode 100644 index 0000000..dae142d --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/accordion-001.yaml @@ -0,0 +1,46 @@ +# Accordion expansion test +id: "accordion-001" +name: "Expand Accordion Section" +description: "Test clicking to expand an accordion panel" +enabled: true + +target: + url: "https://jqueryui.com/accordion/" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Click to expand the \"Section 2\" accordion panel" + reasoning: "Testing accordion expand/collapse interaction" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Located the Section 2 accordion header" + - "Successfully clicked to expand the section" + - "Section 2 content became visible" + - "Other sections collapsed appropriately" + - "Accordion animation completed smoothly" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify Section 2 is now expanded and content visible" + - "Check if other accordion sections collapsed" + - "Confirm the expansion animation completed" + - "Ensure Section 2 header shows expanded state" + +metadata: + tags: ["action", "accordion", "expand", "collapse", "ui"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/action-agent-a11y-001.yaml b/eval-server/nodejs/evals/action-agent/action-agent-a11y-001.yaml new file mode 100644 index 0000000..9526551 --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/action-agent-a11y-001.yaml @@ -0,0 +1,46 @@ +# Accessibility action test +id: "action-agent-a11y-001" +name: "Click Using ARIA Label" +description: "Test clicking an element identified primarily by ARIA attributes" +enabled: true + +target: + url: "https://www.w3.org/WAI/ARIA/apg/patterns/button/examples/button/" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Click the button with aria-label \"Print Page\"" + reasoning: "Testing action selection using accessibility attributes" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Used accessibility tree to find elements" + - "Correctly identified element by ARIA label" + - "Successfully clicked the target button" + - "Demonstrated understanding of accessibility attributes" + - "No reliance on visual appearance alone" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify the Print Page button was successfully clicked" + - "Check if any print dialog or print preview appeared" + - "Confirm the button showed visual feedback (pressed state)" + - "Ensure the action was performed on the correct accessibility-labeled element" + +metadata: + tags: ["action", "accessibility", "aria", "click", "a11y"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/action-agent-accordion-001.yaml b/eval-server/nodejs/evals/action-agent/action-agent-accordion-001.yaml new file mode 100644 index 0000000..f2df343 --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/action-agent-accordion-001.yaml @@ -0,0 +1,46 @@ +# Accordion expansion test +id: "action-agent-accordion-001" +name: "Expand Accordion Section" +description: "Test clicking to expand an accordion panel" +enabled: true + +target: + url: "https://jqueryui.com/accordion/" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Click to expand the \"Section 2\" accordion panel" + reasoning: "Testing accordion expand/collapse interaction" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Located the Section 2 accordion header" + - "Successfully clicked to expand the section" + - "Section 2 content became visible" + - "Other sections collapsed appropriately" + - "Accordion animation completed smoothly" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify Section 2 is now expanded and content visible" + - "Check if other accordion sections collapsed" + - "Confirm the expansion animation completed" + - "Ensure Section 2 header shows expanded state" + +metadata: + tags: ["action", "accordion", "expand", "collapse", "ui"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/action-agent-autocomplete-001.yaml b/eval-server/nodejs/evals/action-agent/action-agent-autocomplete-001.yaml new file mode 100644 index 0000000..c22bfc7 --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/action-agent-autocomplete-001.yaml @@ -0,0 +1,46 @@ +# Autocomplete search test +id: "action-agent-autocomplete-001" +name: "Use Autocomplete Search" +description: "Test typing in autocomplete field and selecting from suggestions" +enabled: true + +target: + url: "https://jqueryui.com/autocomplete/" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Type \"Java\" in the autocomplete field and select \"JavaScript\" from suggestions" + reasoning: "Testing autocomplete/typeahead interaction patterns" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Located the autocomplete input field" + - "Typed \"Java\" to trigger suggestions" + - "Autocomplete dropdown appeared with suggestions" + - "Selected \"JavaScript\" from the suggestion list" + - "Input field shows the selected value" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify \"JavaScript\" appears in the input field" + - "Check if autocomplete suggestions appeared" + - "Confirm the correct suggestion was selected" + - "Ensure dropdown closed after selection" + +metadata: + tags: ["action", "autocomplete", "typeahead", "search", "suggestions"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/action-agent-checkbox-001.yaml b/eval-server/nodejs/evals/action-agent/action-agent-checkbox-001.yaml new file mode 100644 index 0000000..b76f307 --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/action-agent-checkbox-001.yaml @@ -0,0 +1,46 @@ +# Checkbox/radio button test +id: "action-agent-checkbox-001" +name: "Toggle Newsletter Checkbox" +description: "Test clicking checkbox elements for form options" +enabled: true + +target: + url: "https://www.w3schools.com/html/tryit.asp?filename=tryhtml_checkbox" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 45000 + +input: + objective: "Click the checkbox labeled \"I have a bike\" to check it" + reasoning: "Testing interaction with checkbox form elements" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Identified the correct checkbox among multiple options" + - "Used click action on the checkbox element" + - "Checkbox state changed from unchecked to checked" + - "Handled the iframe structure if present" + - "No errors with form element interaction" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Compare screenshots to verify the checkbox state changed from unchecked to checked" + - "Confirm the \"I have a bike\" checkbox now shows a checkmark" + - "Verify the checkbox visual indicator (checkmark) is clearly visible" + - "Ensure no other checkboxes were accidentally modified" + +metadata: + tags: ["action", "checkbox", "form", "w3schools", "input"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/action-agent-checkbox-002.yaml b/eval-server/nodejs/evals/action-agent/action-agent-checkbox-002.yaml new file mode 100644 index 0000000..0b25fa8 --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/action-agent-checkbox-002.yaml @@ -0,0 +1,47 @@ +# Toggle checkbox test - using HTML form test site +id: "action-agent-checkbox-002" +name: "Check Extra Cheese Checkbox" +description: "Test checking a specific checkbox using the check method" +enabled: true + +target: + url: "https://httpbin.org/forms/post" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 45000 + +input: + objective: "Find and check the \"Extra Cheese\" checkbox in the Pizza Toppings section" + reasoning: "Testing checkbox interaction functionality using check method" + hint: "Look for the Extra Cheese checkbox and use the check method to select it" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Located the Extra Cheese checkbox in the Pizza Toppings section" + - "Used the check method instead of click for better reliability" + - "Checkbox became checked (if it wasn't already)" + - "No errors occurred during checkbox interaction" + - "Form maintained its structure after checkbox selection" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify the Extra Cheese checkbox is now checked (shows checkmark)" + - "Check that the checkbox shows proper visual feedback for checked state" + - "Confirm the form structure remained intact" + - "Ensure the checkbox for Extra Cheese was specifically targeted and checked" + +metadata: + tags: ["action", "checkbox", "check", "form", "httpbin"] + priority: "high" + timeout: 45000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/action-agent-click-001.yaml b/eval-server/nodejs/evals/action-agent/action-agent-click-001.yaml new file mode 100644 index 0000000..e9af6cf --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/action-agent-click-001.yaml @@ -0,0 +1,47 @@ +# Basic search interaction test +id: "action-agent-click-001" +name: "Search with Text Entry and Click" +description: "Test entering text in search field and clicking search button" +enabled: true + +target: + url: "https://www.google.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 90000 + +input: + objective: "Type \"DevTools automation\" in the search box and then click the \"Google Search\" button" + reasoning: "Testing multi-step interaction: text input followed by button click" + hint: "First fill the search input field, then find and click the search button" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Successfully located the search input field" + - "Entered \"DevTools automation\" text in the search box" + - "Located the Google Search button after entering text" + - "Successfully clicked the search button" + - "Search was executed and results page loaded" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify text \"DevTools automation\" was entered in the search field" + - "Check if search results page loaded with relevant results" + - "Confirm the search was executed (URL changed to results page)" + - "Ensure search results are related to \"DevTools automation\"" + +metadata: + tags: ["action", "multi-step", "search", "form-fill", "click", "google", "basic"] + priority: "high" + timeout: 90000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/action-agent-context-001.yaml b/eval-server/nodejs/evals/action-agent/action-agent-context-001.yaml new file mode 100644 index 0000000..6162697 --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/action-agent-context-001.yaml @@ -0,0 +1,46 @@ +# Right click context menu test +id: "action-agent-context-001" +name: "Right Click Context Menu" +description: "Test right-clicking to open context menu" +enabled: true + +target: + url: "https://the-internet.herokuapp.com/context_menu" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Right-click on the context menu area to open the context menu" + reasoning: "Testing right-click context menu interaction" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Located the designated context menu area" + - "Performed right-click action correctly" + - "Context menu appeared with options" + - "Successfully triggered the right-click event" + - "Alert or confirmation appeared as expected" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify right-click was performed on correct area" + - "Check if context menu or alert appeared" + - "Confirm right-click event was properly triggered" + - "Ensure the expected response occurred" + +metadata: + tags: ["action", "context-menu", "right-click", "mouse", "menu"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/action-agent-datepicker-001.yaml b/eval-server/nodejs/evals/action-agent/action-agent-datepicker-001.yaml new file mode 100644 index 0000000..f4abbf7 --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/action-agent-datepicker-001.yaml @@ -0,0 +1,46 @@ +# Date picker test +id: "action-agent-datepicker-001" +name: "Select Date from Calendar" +description: "Test clicking date input and selecting a specific date from calendar popup" +enabled: true + +target: + url: "https://jqueryui.com/datepicker/" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Click the date input field and select March 15, 2024 from the calendar picker" + reasoning: "Testing interaction with calendar popup widgets" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Located and clicked the date input field" + - "Calendar popup opened successfully" + - "Navigated to correct month/year if needed" + - "Selected the specific date (March 15, 2024)" + - "Date input field shows the selected date" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify the date input field contains the selected date" + - "Check if the calendar widget opened and closed properly" + - "Confirm the correct date was highlighted and selected" + - "Ensure the date format matches expected output" + +metadata: + tags: ["action", "datepicker", "calendar", "form", "popup"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/action-agent-daterange-001.yaml b/eval-server/nodejs/evals/action-agent/action-agent-daterange-001.yaml new file mode 100644 index 0000000..4581a47 --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/action-agent-daterange-001.yaml @@ -0,0 +1,46 @@ +# Date range picker test +id: "action-agent-daterange-001" +name: "Select Date Range" +description: "Test selecting a date range with start and end dates" +enabled: true + +target: + url: "https://www.daterangepicker.com/" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Select a date range from February 1, 2024 to February 28, 2024" + reasoning: "Testing complex date range selection with start and end dates" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Opened the date range picker interface" + - "Selected the start date (February 1, 2024)" + - "Selected the end date (February 28, 2024)" + - "Date range was properly applied" + - "Input field shows the complete date range" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify both start and end dates are displayed in the input" + - "Check if the date range picker shows the selected range" + - "Confirm the format matches expected date range display" + - "Ensure both dates were selected in sequence" + +metadata: + tags: ["action", "daterange", "date-picker", "form", "complex"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/action-agent-dropdown-001.yaml b/eval-server/nodejs/evals/action-agent/action-agent-dropdown-001.yaml new file mode 100644 index 0000000..b37b91c --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/action-agent-dropdown-001.yaml @@ -0,0 +1,46 @@ +# Dropdown selection test +id: "action-agent-dropdown-001" +name: "Select Dropdown Option" +description: "Test selecting an option from a dropdown menu" +enabled: true + +target: + url: "https://www.w3schools.com/tags/tryit.asp?filename=tryhtml_select" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 45000 + +input: + objective: "Select \"Audi\" from the car brands dropdown menu" + reasoning: "Testing dropdown selection interaction" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Located the dropdown/select element" + - "Identified the correct option to select" + - "Successfully selected the Audi option" + - "Dropdown value changed to the selected option" + - "Handled select element interaction properly" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Compare screenshots to verify the dropdown selection changed" + - "Confirm \"Audi\" is now displayed as the selected option" + - "Check if the dropdown is closed after selection" + - "Verify no other form elements were affected by the selection" + +metadata: + tags: ["action", "dropdown", "select", "form", "w3schools"] + priority: "high" + timeout: 45000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/action-agent-dynamic-001.yaml b/eval-server/nodejs/evals/action-agent/action-agent-dynamic-001.yaml new file mode 100644 index 0000000..a4380f3 --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/action-agent-dynamic-001.yaml @@ -0,0 +1,46 @@ +# Dynamic content interaction test +id: "action-agent-dynamic-001" +name: "Click Dynamic Load Button" +description: "Test clicking a button that loads dynamic content" +enabled: true + +target: + url: "https://the-internet.herokuapp.com/dynamic_loading/1" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 90000 + +input: + objective: "Click the \"Start\" button to trigger dynamic content loading" + reasoning: "Testing interaction with dynamically loaded content" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Found and clicked the Start button" + - "Handled the dynamic loading process" + - "Recognized that content changes after clicking" + - "No timing issues with the dynamic content" + - "Successfully triggered the loading animation" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Compare screenshots to verify dynamic content loaded after clicking Start" + - "Check if loading animation or spinner was displayed" + - "Confirm new content appeared that was previously hidden" + - "Verify the Start button state changed or was replaced after clicking" + +metadata: + tags: ["action", "dynamic", "click", "ajax", "loading"] + priority: "high" + timeout: 90000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/action-agent-ecommerce-001.yaml b/eval-server/nodejs/evals/action-agent/action-agent-ecommerce-001.yaml new file mode 100644 index 0000000..503c157 --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/action-agent-ecommerce-001.yaml @@ -0,0 +1,46 @@ +# E-commerce action test +id: "action-agent-ecommerce-001" +name: "Add Product to Cart" +description: "Test clicking \"Add to Cart\" button on an e-commerce product page" +enabled: true + +target: + url: "https://www.homedepot.com/p/Husky-20-Gal-Professional-Duty-Waterproof-Storage-Container-with-Hinged-Lid-in-Red-249160/313799634" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 180000 + +input: + objective: "Click the \"Add to Cart\" button for this storage container" + reasoning: "Testing e-commerce interaction with product cart functionality" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Located the Add to Cart button on the product page" + - "Successfully clicked the button" + - "Handled any popups or confirmations that appeared" + - "Verified the item was added (cart count changed or confirmation shown)" + - "Dealt with page dynamics after clicking" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Compare screenshots to verify the Add to Cart button was clicked" + - "Check if cart count indicator increased or shows the item was added" + - "Look for any confirmation popup or notification about the item being added" + - "Verify the button state changed (e.g., to \"Added to Cart\" or disabled)" + +metadata: + tags: ["action", "ecommerce", "click", "homedepot", "cart"] + priority: "high" + timeout: 180000 + retries: 3 + flaky: true + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/action-agent-error-001.yaml b/eval-server/nodejs/evals/action-agent/action-agent-error-001.yaml new file mode 100644 index 0000000..43c95e6 --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/action-agent-error-001.yaml @@ -0,0 +1,47 @@ +# Error recovery test +id: "action-agent-error-001" +name: "Handle Missing Element" +description: "Test agent behavior when target element is not found" +enabled: true + +target: + url: "https://www.google.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Click the \"Sign Up\" button" + reasoning: "Testing error handling when element does not exist" + hint: "There is no Sign Up button on Google homepage - agent should handle gracefully" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Attempted to find the requested element" + - "Recognized that the element does not exist" + - "Provided clear error message or explanation" + - "Did not crash or produce confusing output" + - "Suggested alternatives or explained the issue" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify the page remains in a stable state despite the missing element" + - "Confirm no error dialogs or broken UI elements appeared" + - "Check that the agent handled the missing element gracefully" + - "Ensure the page was properly analyzed even though the target was not found" + +metadata: + tags: ["action", "error-handling", "missing-element", "recovery", "edge-case"] + priority: "high" + timeout: 60000 + retries: 1 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/action-agent-filter-001.yaml b/eval-server/nodejs/evals/action-agent/action-agent-filter-001.yaml new file mode 100644 index 0000000..7782999 --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/action-agent-filter-001.yaml @@ -0,0 +1,46 @@ +# Search filter application test +id: "action-agent-filter-001" +name: "Apply Search Filters" +description: "Test applying search filters to modify results" +enabled: true + +target: + url: "https://www.w3schools.com/howto/howto_js_filter_lists.asp" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Type \"Anna\" in the search filter to filter the list" + reasoning: "Testing search filter application" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Located the search filter input" + - "Typed \"Anna\" in the filter field" + - "List items filtered to show only matching results" + - "Non-matching items were hidden or removed from view" + - "Filter functionality worked as expected" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify search input contains \"Anna\"" + - "Check if list shows only items containing \"Anna\"" + - "Confirm non-matching items are not visible" + - "Ensure filter functionality reduced the visible list items" + +metadata: + tags: ["action", "filter", "search", "list", "dynamic"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/action-agent-form-001.yaml b/eval-server/nodejs/evals/action-agent/action-agent-form-001.yaml new file mode 100644 index 0000000..61d036f --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/action-agent-form-001.yaml @@ -0,0 +1,46 @@ +# Form fill action test +id: "action-agent-form-001" +name: "Fill Search Query" +description: "Test filling a search input field with specific text" +enabled: true + +target: + url: "https://www.google.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 45000 + +input: + objective: "Fill the search box with \"Chrome DevTools automation testing\"" + reasoning: "Testing form input capability with a specific search query" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Successfully identified the search input field" + - "Used perform_action with fill method" + - "Correctly filled the field with the specified text" + - "Verified the field accepted the input" + - "No formatting or encoding issues with the text" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Compare screenshots to confirm text was entered in the search field" + - "Verify the exact text \"Chrome DevTools automation testing\" is visible" + - "Check if search suggestions or autocomplete dropdown appeared" + - "Ensure no input validation errors are shown" + +metadata: + tags: ["action", "form-fill", "input", "google", "basic"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/action-agent-hover-001.yaml b/eval-server/nodejs/evals/action-agent/action-agent-hover-001.yaml new file mode 100644 index 0000000..ed98fbf --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/action-agent-hover-001.yaml @@ -0,0 +1,46 @@ +# Hover action test +id: "action-agent-hover-001" +name: "Hover to Reveal Menu" +description: "Test hovering over an element to reveal hidden content" +enabled: true + +target: + url: "https://the-internet.herokuapp.com/hovers" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Hover over the first user avatar image to reveal the hidden caption" + reasoning: "Testing hover interaction to reveal dynamic content" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Located the first user avatar image" + - "Used appropriate hover action method" + - "Successfully triggered the hover state" + - "Hidden caption became visible after hover" + - "Handled mouse interaction correctly" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Compare screenshots to verify hover revealed hidden content" + - "Check that caption or overlay appeared over the first avatar" + - "Confirm the hover state is visually active on the image" + - "Verify user information or caption text is now visible" + +metadata: + tags: ["action", "hover", "mouse", "dynamic", "reveal"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/action-agent-keyboard-001.yaml b/eval-server/nodejs/evals/action-agent/action-agent-keyboard-001.yaml new file mode 100644 index 0000000..6bfceac --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/action-agent-keyboard-001.yaml @@ -0,0 +1,46 @@ +# Keyboard tab navigation test +id: "action-agent-keyboard-001" +name: "Keyboard Tab Navigation" +description: "Test using keyboard navigation to move between elements" +enabled: true + +target: + url: "https://www.w3.org/WAI/ARIA/apg/patterns/menubar/examples/menubar-navigation/" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Use Tab key to navigate between menu items and Enter to activate" + reasoning: "Testing keyboard-only navigation patterns" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Successfully used keyboard navigation" + - "Tab key moved focus between menu items" + - "Focus indicators were visible during navigation" + - "Enter key activated the focused menu item" + - "Keyboard navigation followed accessibility standards" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify focus indicators are visible on menu items" + - "Check if keyboard navigation moved focus correctly" + - "Confirm Enter key activated the focused item" + - "Ensure accessibility navigation patterns worked" + +metadata: + tags: ["action", "keyboard", "navigation", "accessibility", "focus"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/action-agent-login-001.yaml b/eval-server/nodejs/evals/action-agent/action-agent-login-001.yaml new file mode 100644 index 0000000..1b705ce --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/action-agent-login-001.yaml @@ -0,0 +1,47 @@ +# Login form test +id: "action-agent-login-001" +name: "Fill Login Credentials" +description: "Test filling username and password fields in a login form" +enabled: true + +target: + url: "https://the-internet.herokuapp.com/login" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Fill the username field with \"tomsmith\" and password field with \"SuperSecretPassword!\"" + reasoning: "Testing form fill with multiple fields including password type" + input_data: "tomsmithSuperSecretPassword!" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Identified both username and password fields" + - "Filled username field with correct value" + - "Filled password field with correct value" + - "Handled password field type appropriately" + - "Used the provided input_data XML format correctly" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify the username field shows \"tomsmith\" entered" + - "Confirm the password field has dots/asterisks indicating password entry" + - "Check that both fields are properly filled before submission" + - "Ensure no validation errors are shown for the filled fields" + +metadata: + tags: ["action", "login", "form-fill", "authentication", "multi-field"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/action-agent-modal-001.yaml b/eval-server/nodejs/evals/action-agent/action-agent-modal-001.yaml new file mode 100644 index 0000000..1324fee --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/action-agent-modal-001.yaml @@ -0,0 +1,46 @@ +# Modal dialog test +id: "action-agent-modal-001" +name: "Open and Close Modal" +description: "Test opening modal dialog and closing it with X button" +enabled: true + +target: + url: "https://getbootstrap.com/docs/5.0/components/modal/" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Click to open the modal dialog, then close it using the X button" + reasoning: "Testing modal dialog interaction patterns" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Located and clicked the modal trigger button" + - "Modal dialog opened successfully" + - "Modal content was visible and accessible" + - "Found and clicked the close (X) button" + - "Modal closed and page returned to normal state" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify modal opened with visible content" + - "Check if modal overlay appeared correctly" + - "Confirm modal was closed after clicking X" + - "Ensure page background is accessible again" + +metadata: + tags: ["action", "modal", "dialog", "popup", "overlay"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/action-agent-multiselect-001.yaml b/eval-server/nodejs/evals/action-agent/action-agent-multiselect-001.yaml new file mode 100644 index 0000000..fed3f78 --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/action-agent-multiselect-001.yaml @@ -0,0 +1,46 @@ +# Multi-select dropdown test +id: "action-agent-multiselect-001" +name: "Select Multiple Options" +description: "Test selecting multiple options from a multi-select dropdown" +enabled: true + +target: + url: "https://www.w3schools.com/tags/tryit.asp?filename=tryhtml_select_multiple" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Select both \"Volvo\" and \"Audi\" from the multi-select dropdown" + reasoning: "Testing multiple selection in select elements" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Located the multi-select dropdown element" + - "Successfully selected Volvo option" + - "Successfully selected Audi option" + - "Both options remain selected simultaneously" + - "Used appropriate multi-select interaction method" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify both Volvo and Audi appear selected" + - "Check if both options are highlighted/marked" + - "Confirm multi-select functionality worked correctly" + - "Ensure no other options were accidentally selected" + +metadata: + tags: ["action", "multi-select", "dropdown", "form", "multiple"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/action-agent-multistep-001.yaml b/eval-server/nodejs/evals/action-agent/action-agent-multistep-001.yaml new file mode 100644 index 0000000..31514dd --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/action-agent-multistep-001.yaml @@ -0,0 +1,47 @@ +# Multi-step form test +id: "action-agent-multistep-001" +name: "Complete Search and Submit" +description: "Test filling a search form and then clicking the submit button" +enabled: true + +target: + url: "https://www.bing.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Fill the search box with \"automated testing tools\" and then click the search button" + reasoning: "Testing multi-step form interaction combining fill and click actions" + hint: "This requires two actions: first fill the search field, then click the search button" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Recognized this requires multiple actions" + - "First filled the search input correctly" + - "Then located and clicked the search button" + - "Both actions completed successfully in sequence" + - "Search was initiated with the correct query" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify the search input contains \"automated testing tools\" text" + - "Confirm the search was submitted and results page loaded" + - "Check that search results are related to the query" + - "Ensure the multi-step action completed fully with both fill and click" + +metadata: + tags: ["action", "multi-step", "form-fill", "click", "bing", "search"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/action-agent-nav-001.yaml b/eval-server/nodejs/evals/action-agent/action-agent-nav-001.yaml new file mode 100644 index 0000000..f49a0cf --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/action-agent-nav-001.yaml @@ -0,0 +1,46 @@ +# Complex navigation test +id: "action-agent-nav-001" +name: "Navigate via Menu Click" +description: "Test clicking navigation menu items to navigate between pages" +enabled: true + +target: + url: "https://www.wikipedia.org" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Click on the \"English\" language link to navigate to English Wikipedia" + reasoning: "Testing navigation through link clicks on a multilingual site" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Identified the correct language link among many options" + - "Successfully clicked the English link" + - "Navigation occurred to the English Wikipedia" + - "Used appropriate tools to verify navigation success" + - "Handled the multilingual page structure correctly" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Compare screenshots to verify navigation from Wikipedia homepage to English Wikipedia" + - "Check if the page language and content changed to English" + - "Verify the URL changed to en.wikipedia.org" + - "Confirm the English Wikipedia main page is displayed" + +metadata: + tags: ["action", "navigation", "click", "wikipedia", "multilingual"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/action-agent-radio-001.yaml b/eval-server/nodejs/evals/action-agent/action-agent-radio-001.yaml new file mode 100644 index 0000000..07d6ef8 --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/action-agent-radio-001.yaml @@ -0,0 +1,47 @@ +# Radio button selection test +id: "action-agent-radio-001" +name: "Select Radio Button Option" +description: "Test selecting a specific radio button option using click method" +enabled: true + +target: + url: "https://httpbin.org/forms/post" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 45000 + +input: + objective: "Select the \"Medium\" pizza size from the Pizza Size radio button group" + reasoning: "Testing radio button selection functionality" + hint: "Look for the Medium radio button in the Pizza Size section and click it to select" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Located the Medium radio button in the Pizza Size section" + - "Successfully clicked the Medium radio button" + - "Radio button became selected (checked state)" + - "Other radio buttons in the same group became unselected" + - "Form maintained its structure after radio button selection" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify the Medium radio button is now selected (shows filled circle)" + - "Check that other pizza size options (Small, Large) are no longer selected" + - "Confirm the form structure remained intact" + - "Ensure the Medium pizza size radio button was specifically targeted" + +metadata: + tags: ["action", "radio", "click", "form", "httpbin"] + priority: "high" + timeout: 45000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/action-agent-slider-001.yaml b/eval-server/nodejs/evals/action-agent/action-agent-slider-001.yaml new file mode 100644 index 0000000..c370658 --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/action-agent-slider-001.yaml @@ -0,0 +1,46 @@ +# Range slider test +id: "action-agent-slider-001" +name: "Adjust Range Slider" +description: "Test moving slider to set a specific value" +enabled: true + +target: + url: "https://jqueryui.com/slider/" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Move the slider to set the value to 75" + reasoning: "Testing slider/range input manipulation" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Located the slider control element" + - "Successfully moved the slider handle" + - "Set the slider value to approximately 75" + - "Slider position reflects the target value" + - "Any associated display shows the correct value" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify slider handle moved to represent value 75" + - "Check if value display shows 75 or close to it" + - "Confirm slider position visually matches target" + - "Ensure slider interaction was smooth and successful" + +metadata: + tags: ["action", "slider", "range", "form", "drag"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/action-agent-tableselect-001.yaml b/eval-server/nodejs/evals/action-agent/action-agent-tableselect-001.yaml new file mode 100644 index 0000000..d78e66c --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/action-agent-tableselect-001.yaml @@ -0,0 +1,46 @@ +# Table row selection test +id: "action-agent-tableselect-001" +name: "Select Table Row" +description: "Test clicking to select a table row" +enabled: true + +target: + url: "https://datatables.net/examples/api/select_single_row.html" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Click on the first row to select it" + reasoning: "Testing table row selection patterns" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Located the first table row" + - "Successfully clicked the row" + - "Row became highlighted/selected" + - "Selection state is visually apparent" + - "Only one row is selected at a time" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify the first row is now highlighted/selected" + - "Check if row selection visual feedback is clear" + - "Confirm only the clicked row is selected" + - "Ensure row selection styling is properly applied" + +metadata: + tags: ["action", "table", "select", "row", "highlight"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/action-agent-tablesort-001.yaml b/eval-server/nodejs/evals/action-agent/action-agent-tablesort-001.yaml new file mode 100644 index 0000000..e3e3176 --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/action-agent-tablesort-001.yaml @@ -0,0 +1,46 @@ +# Table column sorting test +id: "action-agent-tablesort-001" +name: "Sort Table Column" +description: "Test clicking table column header to sort data" +enabled: true + +target: + url: "https://datatables.net/examples/basic_init/zero_configuration.html" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Click on the \"Name\" column header to sort the table by name" + reasoning: "Testing table column sorting interaction" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Located the Name column header" + - "Successfully clicked the column header" + - "Table data reordered by name alphabetically" + - "Sort indicator appeared on the Name column" + - "Table sorting completed without errors" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify table rows are now sorted alphabetically by name" + - "Check if sort arrow/indicator appears on Name column" + - "Confirm the data order changed from before to after" + - "Ensure table structure remained intact after sorting" + +metadata: + tags: ["action", "table", "sort", "column", "data"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/action-agent-tabs-001.yaml b/eval-server/nodejs/evals/action-agent/action-agent-tabs-001.yaml new file mode 100644 index 0000000..22db60c --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/action-agent-tabs-001.yaml @@ -0,0 +1,46 @@ +# Tab panel navigation test +id: "action-agent-tabs-001" +name: "Navigate Tab Panels" +description: "Test clicking tab to switch between tab panels" +enabled: true + +target: + url: "https://jqueryui.com/tabs/" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Click on the \"Nunc tincidunt\" tab to switch to that panel" + reasoning: "Testing tab panel navigation" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Located the \"Nunc tincidunt\" tab button" + - "Successfully clicked the tab" + - "Tab panel content switched to the selected tab" + - "Active tab visual state changed appropriately" + - "Content area updated to show the new panel" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify the \"Nunc tincidunt\" tab is now active/highlighted" + - "Check if the content panel changed to show new content" + - "Confirm the tab switching animation completed" + - "Ensure the correct tab content is visible" + +metadata: + tags: ["action", "tabs", "navigation", "panels", "ui"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/action-agent-timepicker-001.yaml b/eval-server/nodejs/evals/action-agent/action-agent-timepicker-001.yaml new file mode 100644 index 0000000..056fbe9 --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/action-agent-timepicker-001.yaml @@ -0,0 +1,46 @@ +# Time picker test +id: "action-agent-timepicker-001" +name: "Select Time from Picker" +description: "Test setting time using time picker controls" +enabled: true + +target: + url: "https://timepicker.co/" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Set the time to 2:30 PM using the time picker controls" + reasoning: "Testing time selection with hour/minute controls" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Located the time picker interface" + - "Set the hour to 2 (14 for 24-hour format)" + - "Set the minutes to 30" + - "Selected PM or appropriate time format" + - "Time input shows 2:30 PM or equivalent" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify the time input displays 2:30 PM or 14:30" + - "Check if hour and minute were set correctly" + - "Confirm AM/PM selection if applicable" + - "Ensure the time picker interface was properly used" + +metadata: + tags: ["action", "timepicker", "time", "form", "clock"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/action-agent-upload-001.yaml b/eval-server/nodejs/evals/action-agent/action-agent-upload-001.yaml new file mode 100644 index 0000000..518515d --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/action-agent-upload-001.yaml @@ -0,0 +1,46 @@ +# File upload test +id: "action-agent-upload-001" +name: "Upload File via Input" +description: "Test clicking file input and uploading a test file" +enabled: true + +target: + url: "https://the-internet.herokuapp.com/upload" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Click the file input and upload a test file" + reasoning: "Testing file upload interaction through input elements" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Located the file input element" + - "Triggered file selection dialog" + - "Selected a file for upload" + - "File name appears in the input field" + - "Upload process initiated successfully" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify file name appears in the upload input field" + - "Check if file selection was successful" + - "Confirm upload button is available or file is ready" + - "Ensure no upload errors are displayed" + +metadata: + tags: ["action", "upload", "file", "input", "form"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/action-agent-video-001.yaml b/eval-server/nodejs/evals/action-agent/action-agent-video-001.yaml new file mode 100644 index 0000000..ba21b28 --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/action-agent-video-001.yaml @@ -0,0 +1,47 @@ +# Video playback controls test +id: "action-agent-video-001" +name: "Control Video Playback" +description: "Test starting video playback using click + spacebar" +enabled: true + +target: + url: "https://www.w3schools.com/html/html5_video.asp" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 90000 + +input: + objective: "Click the video element to focus it, then press spacebar to start playback" + reasoning: "Testing video control using standard keyboard interaction (click to focus + spacebar to play)" + hint: "First click the Video element to focus it, then use keyboard input to press the spacebar key to start playback" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Located the Video element in the accessibility tree" + - "Successfully clicked the Video element to focus it" + - "Used keyboard input to press spacebar" + - "Video playback started after spacebar press" + - "No errors occurred during the interaction sequence" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify video player is visible on the page" + - "Check if the play button was clicked (may show pause button after)" + - "Look for visual indicators that video started playing" + - "Ensure no error messages appeared during video interaction" + +metadata: + tags: ["action", "video", "media", "controls", "playback"] + priority: "high" + timeout: 90000 + retries: 3 + flaky: true + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/action-agent-video-002.yaml b/eval-server/nodejs/evals/action-agent/action-agent-video-002.yaml new file mode 100644 index 0000000..d7188ec --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/action-agent-video-002.yaml @@ -0,0 +1,47 @@ +# Video play button specific targeting test +id: "action-agent-video-002" +name: "Click Video Play Button Specifically" +description: "Test clicking the specific play button (not the video element)" +enabled: true + +target: + url: "https://www.w3schools.com/html/html5_video.asp" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Find and click the button that has name=\"play\" (not the Video element itself)" + reasoning: "Testing specific targeting of the play button element" + hint: "Target the button element with text or label \"play\", do not click the Video element" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Found a button element (not Video element) with \"play\" in the name" + - "Successfully clicked the play button specifically" + - "Did not click on the Video element itself" + - "Play button click was executed correctly" + - "Video responded to the button click" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify the play button (not video element) was clicked" + - "Check if video started playing after button click" + - "Confirm the target was the button, not the video container" + - "Look for changes in video player state" + +metadata: + tags: ["action", "video", "button", "specific-targeting"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/autocomplete-001.yaml b/eval-server/nodejs/evals/action-agent/autocomplete-001.yaml new file mode 100644 index 0000000..4bd4aa8 --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/autocomplete-001.yaml @@ -0,0 +1,46 @@ +# Autocomplete search test +id: "autocomplete-001" +name: "Use Autocomplete Search" +description: "Test typing in autocomplete field and selecting from suggestions" +enabled: true + +target: + url: "https://jqueryui.com/autocomplete/" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Type \"Java\" in the autocomplete field and select \"JavaScript\" from suggestions" + reasoning: "Testing autocomplete/typeahead interaction patterns" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Located the autocomplete input field" + - "Typed \"Java\" to trigger suggestions" + - "Autocomplete dropdown appeared with suggestions" + - "Selected \"JavaScript\" from the suggestion list" + - "Input field shows the selected value" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify \"JavaScript\" appears in the input field" + - "Check if autocomplete suggestions appeared" + - "Confirm the correct suggestion was selected" + - "Ensure dropdown closed after selection" + +metadata: + tags: ["action", "autocomplete", "typeahead", "search", "suggestions"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/checkbox-001.yaml b/eval-server/nodejs/evals/action-agent/checkbox-001.yaml new file mode 100644 index 0000000..041f2f6 --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/checkbox-001.yaml @@ -0,0 +1,46 @@ +# Checkbox/radio button test +id: "checkbox-001" +name: "Toggle Newsletter Checkbox" +description: "Test clicking checkbox elements for form options" +enabled: true + +target: + url: "https://www.w3schools.com/html/tryit.asp?filename=tryhtml_checkbox" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 45000 + +input: + objective: "Click the checkbox labeled \"I have a bike\" to check it" + reasoning: "Testing interaction with checkbox form elements" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Identified the correct checkbox among multiple options" + - "Used click action on the checkbox element" + - "Checkbox state changed from unchecked to checked" + - "Handled the iframe structure if present" + - "No errors with form element interaction" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Compare screenshots to verify the checkbox state changed from unchecked to checked" + - "Confirm the \"I have a bike\" checkbox now shows a checkmark" + - "Verify the checkbox visual indicator (checkmark) is clearly visible" + - "Ensure no other checkboxes were accidentally modified" + +metadata: + tags: ["action", "checkbox", "form", "w3schools", "input"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/checkbox-002.yaml b/eval-server/nodejs/evals/action-agent/checkbox-002.yaml new file mode 100644 index 0000000..036f388 --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/checkbox-002.yaml @@ -0,0 +1,47 @@ +# Toggle checkbox test - using HTML form test site +id: "checkbox-002" +name: "Check Extra Cheese Checkbox" +description: "Test checking a specific checkbox using the check method" +enabled: true + +target: + url: "https://httpbin.org/forms/post" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 45000 + +input: + objective: "Find and check the \"Extra Cheese\" checkbox in the Pizza Toppings section" + reasoning: "Testing checkbox interaction functionality using check method" + hint: "Look for the Extra Cheese checkbox and use the check method to select it" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Located the Extra Cheese checkbox in the Pizza Toppings section" + - "Used the check method instead of click for better reliability" + - "Checkbox became checked (if it wasn't already)" + - "No errors occurred during checkbox interaction" + - "Form maintained its structure after checkbox selection" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify the Extra Cheese checkbox is now checked (shows checkmark)" + - "Check that the checkbox shows proper visual feedback for checked state" + - "Confirm the form structure remained intact" + - "Ensure the checkbox for Extra Cheese was specifically targeted and checked" + +metadata: + tags: ["action", "checkbox", "check", "form", "httpbin"] + priority: "high" + timeout: 45000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/click-001.yaml b/eval-server/nodejs/evals/action-agent/click-001.yaml new file mode 100644 index 0000000..e86c8fd --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/click-001.yaml @@ -0,0 +1,47 @@ +# Basic search interaction test +id: "click-001" +name: "Search with Text Entry and Click" +description: "Test entering text in search field and clicking search button" +enabled: true + +target: + url: "https://www.google.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 90000 + +input: + objective: "Type \"DevTools automation\" in the search box and then click the \"Google Search\" button" + reasoning: "Testing multi-step interaction: text input followed by button click" + hint: "First fill the search input field, then find and click the search button" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Successfully located the search input field" + - "Entered \"DevTools automation\" text in the search box" + - "Located the Google Search button after entering text" + - "Successfully clicked the search button" + - "Search was executed and results page loaded" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify text \"DevTools automation\" was entered in the search field" + - "Check if search results page loaded with relevant results" + - "Confirm the search was executed (URL changed to results page)" + - "Ensure search results are related to \"DevTools automation\"" + +metadata: + tags: ["action", "multi-step", "search", "form-fill", "click", "google", "basic"] + priority: "high" + timeout: 90000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/context-001.yaml b/eval-server/nodejs/evals/action-agent/context-001.yaml new file mode 100644 index 0000000..0ca7c58 --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/context-001.yaml @@ -0,0 +1,46 @@ +# Right click context menu test +id: "context-001" +name: "Right Click Context Menu" +description: "Test right-clicking to open context menu" +enabled: true + +target: + url: "https://the-internet.herokuapp.com/context_menu" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Right-click on the context menu area to open the context menu" + reasoning: "Testing right-click context menu interaction" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Located the designated context menu area" + - "Performed right-click action correctly" + - "Context menu appeared with options" + - "Successfully triggered the right-click event" + - "Alert or confirmation appeared as expected" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify right-click was performed on correct area" + - "Check if context menu or alert appeared" + - "Confirm right-click event was properly triggered" + - "Ensure the expected response occurred" + +metadata: + tags: ["action", "context-menu", "right-click", "mouse", "menu"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/datepicker-001.yaml b/eval-server/nodejs/evals/action-agent/datepicker-001.yaml new file mode 100644 index 0000000..9b6a9df --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/datepicker-001.yaml @@ -0,0 +1,46 @@ +# Date picker test +id: "datepicker-001" +name: "Select Date from Calendar" +description: "Test clicking date input and selecting a specific date from calendar popup" +enabled: true + +target: + url: "https://jqueryui.com/datepicker/" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Click the date input field and select March 15, 2024 from the calendar picker" + reasoning: "Testing interaction with calendar popup widgets" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Located and clicked the date input field" + - "Calendar popup opened successfully" + - "Navigated to correct month/year if needed" + - "Selected the specific date (March 15, 2024)" + - "Date input field shows the selected date" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify the date input field contains the selected date" + - "Check if the calendar widget opened and closed properly" + - "Confirm the correct date was highlighted and selected" + - "Ensure the date format matches expected output" + +metadata: + tags: ["action", "datepicker", "calendar", "form", "popup"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/daterange-001.yaml b/eval-server/nodejs/evals/action-agent/daterange-001.yaml new file mode 100644 index 0000000..a9b202b --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/daterange-001.yaml @@ -0,0 +1,46 @@ +# Date range picker test +id: "daterange-001" +name: "Select Date Range" +description: "Test selecting a date range with start and end dates" +enabled: true + +target: + url: "https://www.daterangepicker.com/" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Select a date range from February 1, 2024 to February 28, 2024" + reasoning: "Testing complex date range selection with start and end dates" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Opened the date range picker interface" + - "Selected the start date (February 1, 2024)" + - "Selected the end date (February 28, 2024)" + - "Date range was properly applied" + - "Input field shows the complete date range" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify both start and end dates are displayed in the input" + - "Check if the date range picker shows the selected range" + - "Confirm the format matches expected date range display" + - "Ensure both dates were selected in sequence" + +metadata: + tags: ["action", "daterange", "date-picker", "form", "complex"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/dropdown-001.yaml b/eval-server/nodejs/evals/action-agent/dropdown-001.yaml new file mode 100644 index 0000000..a64edb0 --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/dropdown-001.yaml @@ -0,0 +1,46 @@ +# Dropdown selection test +id: "dropdown-001" +name: "Select Dropdown Option" +description: "Test selecting an option from a dropdown menu" +enabled: true + +target: + url: "https://www.w3schools.com/tags/tryit.asp?filename=tryhtml_select" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 45000 + +input: + objective: "Select \"Audi\" from the car brands dropdown menu" + reasoning: "Testing dropdown selection interaction" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Located the dropdown/select element" + - "Identified the correct option to select" + - "Successfully selected the Audi option" + - "Dropdown value changed to the selected option" + - "Handled select element interaction properly" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Compare screenshots to verify the dropdown selection changed" + - "Confirm \"Audi\" is now displayed as the selected option" + - "Check if the dropdown is closed after selection" + - "Verify no other form elements were affected by the selection" + +metadata: + tags: ["action", "dropdown", "select", "form", "w3schools"] + priority: "high" + timeout: 45000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/dynamic-001.yaml b/eval-server/nodejs/evals/action-agent/dynamic-001.yaml new file mode 100644 index 0000000..fba60bd --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/dynamic-001.yaml @@ -0,0 +1,46 @@ +# Dynamic content interaction test +id: "dynamic-001" +name: "Click Dynamic Load Button" +description: "Test clicking a button that loads dynamic content" +enabled: true + +target: + url: "https://the-internet.herokuapp.com/dynamic_loading/1" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 90000 + +input: + objective: "Click the \"Start\" button to trigger dynamic content loading" + reasoning: "Testing interaction with dynamically loaded content" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Found and clicked the Start button" + - "Handled the dynamic loading process" + - "Recognized that content changes after clicking" + - "No timing issues with the dynamic content" + - "Successfully triggered the loading animation" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Compare screenshots to verify dynamic content loaded after clicking Start" + - "Check if loading animation or spinner was displayed" + - "Confirm new content appeared that was previously hidden" + - "Verify the Start button state changed or was replaced after clicking" + +metadata: + tags: ["action", "dynamic", "click", "ajax", "loading"] + priority: "high" + timeout: 90000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/ecommerce-001.yaml b/eval-server/nodejs/evals/action-agent/ecommerce-001.yaml new file mode 100644 index 0000000..ae573de --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/ecommerce-001.yaml @@ -0,0 +1,46 @@ +# E-commerce action test +id: "ecommerce-001" +name: "Add Product to Cart" +description: "Test clicking \"Add to Cart\" button on an e-commerce product page" +enabled: true + +target: + url: "https://www.homedepot.com/p/Husky-20-Gal-Professional-Duty-Waterproof-Storage-Container-with-Hinged-Lid-in-Red-249160/313799634" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 180000 + +input: + objective: "Click the \"Add to Cart\" button for this storage container" + reasoning: "Testing e-commerce interaction with product cart functionality" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Located the Add to Cart button on the product page" + - "Successfully clicked the button" + - "Handled any popups or confirmations that appeared" + - "Verified the item was added (cart count changed or confirmation shown)" + - "Dealt with page dynamics after clicking" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Compare screenshots to verify the Add to Cart button was clicked" + - "Check if cart count indicator increased or shows the item was added" + - "Look for any confirmation popup or notification about the item being added" + - "Verify the button state changed (e.g., to \"Added to Cart\" or disabled)" + +metadata: + tags: ["action", "ecommerce", "click", "homedepot", "cart"] + priority: "high" + timeout: 180000 + retries: 3 + flaky: true + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/error-001.yaml b/eval-server/nodejs/evals/action-agent/error-001.yaml new file mode 100644 index 0000000..a2b5646 --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/error-001.yaml @@ -0,0 +1,47 @@ +# Error recovery test +id: "error-001" +name: "Handle Missing Element" +description: "Test agent behavior when target element is not found" +enabled: true + +target: + url: "https://www.google.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Click the \"Sign Up\" button" + reasoning: "Testing error handling when element does not exist" + hint: "There is no Sign Up button on Google homepage - agent should handle gracefully" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Attempted to find the requested element" + - "Recognized that the element does not exist" + - "Provided clear error message or explanation" + - "Did not crash or produce confusing output" + - "Suggested alternatives or explained the issue" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify the page remains in a stable state despite the missing element" + - "Confirm no error dialogs or broken UI elements appeared" + - "Check that the agent handled the missing element gracefully" + - "Ensure the page was properly analyzed even though the target was not found" + +metadata: + tags: ["action", "error-handling", "missing-element", "recovery", "edge-case"] + priority: "high" + timeout: 60000 + retries: 1 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/filter-001.yaml b/eval-server/nodejs/evals/action-agent/filter-001.yaml new file mode 100644 index 0000000..7efa8f1 --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/filter-001.yaml @@ -0,0 +1,46 @@ +# Search filter application test +id: "filter-001" +name: "Apply Search Filters" +description: "Test applying search filters to modify results" +enabled: true + +target: + url: "https://www.w3schools.com/howto/howto_js_filter_lists.asp" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Type \"Anna\" in the search filter to filter the list" + reasoning: "Testing search filter application" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Located the search filter input" + - "Typed \"Anna\" in the filter field" + - "List items filtered to show only matching results" + - "Non-matching items were hidden or removed from view" + - "Filter functionality worked as expected" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify search input contains \"Anna\"" + - "Check if list shows only items containing \"Anna\"" + - "Confirm non-matching items are not visible" + - "Ensure filter functionality reduced the visible list items" + +metadata: + tags: ["action", "filter", "search", "list", "dynamic"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/form-001.yaml b/eval-server/nodejs/evals/action-agent/form-001.yaml new file mode 100644 index 0000000..c4f06da --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/form-001.yaml @@ -0,0 +1,46 @@ +# Form fill action test +id: "form-001" +name: "Fill Search Query" +description: "Test filling a search input field with specific text" +enabled: true + +target: + url: "https://www.google.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 45000 + +input: + objective: "Fill the search box with \"Chrome DevTools automation testing\"" + reasoning: "Testing form input capability with a specific search query" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Successfully identified the search input field" + - "Used perform_action with fill method" + - "Correctly filled the field with the specified text" + - "Verified the field accepted the input" + - "No formatting or encoding issues with the text" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Compare screenshots to confirm text was entered in the search field" + - "Verify the exact text \"Chrome DevTools automation testing\" is visible" + - "Check if search suggestions or autocomplete dropdown appeared" + - "Ensure no input validation errors are shown" + +metadata: + tags: ["action", "form-fill", "input", "google", "basic"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/hover-001.yaml b/eval-server/nodejs/evals/action-agent/hover-001.yaml new file mode 100644 index 0000000..a58b225 --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/hover-001.yaml @@ -0,0 +1,46 @@ +# Hover action test +id: "hover-001" +name: "Hover to Reveal Menu" +description: "Test hovering over an element to reveal hidden content" +enabled: true + +target: + url: "https://the-internet.herokuapp.com/hovers" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Hover over the first user avatar image to reveal the hidden caption" + reasoning: "Testing hover interaction to reveal dynamic content" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Located the first user avatar image" + - "Used appropriate hover action method" + - "Successfully triggered the hover state" + - "Hidden caption became visible after hover" + - "Handled mouse interaction correctly" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Compare screenshots to verify hover revealed hidden content" + - "Check that caption or overlay appeared over the first avatar" + - "Confirm the hover state is visually active on the image" + - "Verify user information or caption text is now visible" + +metadata: + tags: ["action", "hover", "mouse", "dynamic", "reveal"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/keyboard-001.yaml b/eval-server/nodejs/evals/action-agent/keyboard-001.yaml new file mode 100644 index 0000000..6a1ffd1 --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/keyboard-001.yaml @@ -0,0 +1,46 @@ +# Keyboard tab navigation test +id: "keyboard-001" +name: "Keyboard Tab Navigation" +description: "Test using keyboard navigation to move between elements" +enabled: true + +target: + url: "https://www.w3.org/WAI/ARIA/apg/patterns/menubar/examples/menubar-navigation/" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Use Tab key to navigate between menu items and Enter to activate" + reasoning: "Testing keyboard-only navigation patterns" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Successfully used keyboard navigation" + - "Tab key moved focus between menu items" + - "Focus indicators were visible during navigation" + - "Enter key activated the focused menu item" + - "Keyboard navigation followed accessibility standards" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify focus indicators are visible on menu items" + - "Check if keyboard navigation moved focus correctly" + - "Confirm Enter key activated the focused item" + - "Ensure accessibility navigation patterns worked" + +metadata: + tags: ["action", "keyboard", "navigation", "accessibility", "focus"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/login-001.yaml b/eval-server/nodejs/evals/action-agent/login-001.yaml new file mode 100644 index 0000000..b56fbca --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/login-001.yaml @@ -0,0 +1,47 @@ +# Login form test +id: "login-001" +name: "Fill Login Credentials" +description: "Test filling username and password fields in a login form" +enabled: true + +target: + url: "https://the-internet.herokuapp.com/login" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Fill the username field with \"tomsmith\" and password field with \"SuperSecretPassword!\"" + reasoning: "Testing form fill with multiple fields including password type" + input_data: "tomsmithSuperSecretPassword!" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Identified both username and password fields" + - "Filled username field with correct value" + - "Filled password field with correct value" + - "Handled password field type appropriately" + - "Used the provided input_data XML format correctly" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify the username field shows \"tomsmith\" entered" + - "Confirm the password field has dots/asterisks indicating password entry" + - "Check that both fields are properly filled before submission" + - "Ensure no validation errors are shown for the filled fields" + +metadata: + tags: ["action", "login", "form-fill", "authentication", "multi-field"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/modal-001.yaml b/eval-server/nodejs/evals/action-agent/modal-001.yaml new file mode 100644 index 0000000..ef05d16 --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/modal-001.yaml @@ -0,0 +1,46 @@ +# Modal dialog test +id: "modal-001" +name: "Open and Close Modal" +description: "Test opening modal dialog and closing it with X button" +enabled: true + +target: + url: "https://getbootstrap.com/docs/5.0/components/modal/" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Click to open the modal dialog, then close it using the X button" + reasoning: "Testing modal dialog interaction patterns" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Located and clicked the modal trigger button" + - "Modal dialog opened successfully" + - "Modal content was visible and accessible" + - "Found and clicked the close (X) button" + - "Modal closed and page returned to normal state" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify modal opened with visible content" + - "Check if modal overlay appeared correctly" + - "Confirm modal was closed after clicking X" + - "Ensure page background is accessible again" + +metadata: + tags: ["action", "modal", "dialog", "popup", "overlay"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/multiselect-001.yaml b/eval-server/nodejs/evals/action-agent/multiselect-001.yaml new file mode 100644 index 0000000..a456c9b --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/multiselect-001.yaml @@ -0,0 +1,46 @@ +# Multi-select dropdown test +id: "multiselect-001" +name: "Select Multiple Options" +description: "Test selecting multiple options from a multi-select dropdown" +enabled: true + +target: + url: "https://www.w3schools.com/tags/tryit.asp?filename=tryhtml_select_multiple" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Select both \"Volvo\" and \"Audi\" from the multi-select dropdown" + reasoning: "Testing multiple selection in select elements" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Located the multi-select dropdown element" + - "Successfully selected Volvo option" + - "Successfully selected Audi option" + - "Both options remain selected simultaneously" + - "Used appropriate multi-select interaction method" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify both Volvo and Audi appear selected" + - "Check if both options are highlighted/marked" + - "Confirm multi-select functionality worked correctly" + - "Ensure no other options were accidentally selected" + +metadata: + tags: ["action", "multi-select", "dropdown", "form", "multiple"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/multistep-001.yaml b/eval-server/nodejs/evals/action-agent/multistep-001.yaml new file mode 100644 index 0000000..14923a2 --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/multistep-001.yaml @@ -0,0 +1,47 @@ +# Multi-step form test +id: "multistep-001" +name: "Complete Search and Submit" +description: "Test filling a search form and then clicking the submit button" +enabled: true + +target: + url: "https://www.bing.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Fill the search box with \"automated testing tools\" and then click the search button" + reasoning: "Testing multi-step form interaction combining fill and click actions" + hint: "This requires two actions: first fill the search field, then click the search button" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Recognized this requires multiple actions" + - "First filled the search input correctly" + - "Then located and clicked the search button" + - "Both actions completed successfully in sequence" + - "Search was initiated with the correct query" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify the search input contains \"automated testing tools\" text" + - "Confirm the search was submitted and results page loaded" + - "Check that search results are related to the query" + - "Ensure the multi-step action completed fully with both fill and click" + +metadata: + tags: ["action", "multi-step", "form-fill", "click", "bing", "search"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/nav-001.yaml b/eval-server/nodejs/evals/action-agent/nav-001.yaml new file mode 100644 index 0000000..e1ef610 --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/nav-001.yaml @@ -0,0 +1,46 @@ +# Complex navigation test +id: "nav-001" +name: "Navigate via Menu Click" +description: "Test clicking navigation menu items to navigate between pages" +enabled: true + +target: + url: "https://www.wikipedia.org" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Click on the \"English\" language link to navigate to English Wikipedia" + reasoning: "Testing navigation through link clicks on a multilingual site" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Identified the correct language link among many options" + - "Successfully clicked the English link" + - "Navigation occurred to the English Wikipedia" + - "Used appropriate tools to verify navigation success" + - "Handled the multilingual page structure correctly" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Compare screenshots to verify navigation from Wikipedia homepage to English Wikipedia" + - "Check if the page language and content changed to English" + - "Verify the URL changed to en.wikipedia.org" + - "Confirm the English Wikipedia main page is displayed" + +metadata: + tags: ["action", "navigation", "click", "wikipedia", "multilingual"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/radio-001.yaml b/eval-server/nodejs/evals/action-agent/radio-001.yaml new file mode 100644 index 0000000..a136e1e --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/radio-001.yaml @@ -0,0 +1,47 @@ +# Radio button selection test +id: "radio-001" +name: "Select Radio Button Option" +description: "Test selecting a specific radio button option using click method" +enabled: true + +target: + url: "https://httpbin.org/forms/post" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 45000 + +input: + objective: "Select the \"Medium\" pizza size from the Pizza Size radio button group" + reasoning: "Testing radio button selection functionality" + hint: "Look for the Medium radio button in the Pizza Size section and click it to select" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Located the Medium radio button in the Pizza Size section" + - "Successfully clicked the Medium radio button" + - "Radio button became selected (checked state)" + - "Other radio buttons in the same group became unselected" + - "Form maintained its structure after radio button selection" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify the Medium radio button is now selected (shows filled circle)" + - "Check that other pizza size options (Small, Large) are no longer selected" + - "Confirm the form structure remained intact" + - "Ensure the Medium pizza size radio button was specifically targeted" + +metadata: + tags: ["action", "radio", "click", "form", "httpbin"] + priority: "high" + timeout: 45000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/slider-001.yaml b/eval-server/nodejs/evals/action-agent/slider-001.yaml new file mode 100644 index 0000000..9369671 --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/slider-001.yaml @@ -0,0 +1,46 @@ +# Range slider test +id: "slider-001" +name: "Adjust Range Slider" +description: "Test moving slider to set a specific value" +enabled: true + +target: + url: "https://jqueryui.com/slider/" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Move the slider to set the value to 75" + reasoning: "Testing slider/range input manipulation" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Located the slider control element" + - "Successfully moved the slider handle" + - "Set the slider value to approximately 75" + - "Slider position reflects the target value" + - "Any associated display shows the correct value" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify slider handle moved to represent value 75" + - "Check if value display shows 75 or close to it" + - "Confirm slider position visually matches target" + - "Ensure slider interaction was smooth and successful" + +metadata: + tags: ["action", "slider", "range", "form", "drag"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/tableselect-001.yaml b/eval-server/nodejs/evals/action-agent/tableselect-001.yaml new file mode 100644 index 0000000..b38341e --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/tableselect-001.yaml @@ -0,0 +1,46 @@ +# Table row selection test +id: "tableselect-001" +name: "Select Table Row" +description: "Test clicking to select a table row" +enabled: true + +target: + url: "https://datatables.net/examples/api/select_single_row.html" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Click on the first row to select it" + reasoning: "Testing table row selection patterns" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Located the first table row" + - "Successfully clicked the row" + - "Row became highlighted/selected" + - "Selection state is visually apparent" + - "Only one row is selected at a time" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify the first row is now highlighted/selected" + - "Check if row selection visual feedback is clear" + - "Confirm only the clicked row is selected" + - "Ensure row selection styling is properly applied" + +metadata: + tags: ["action", "table", "select", "row", "highlight"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/tablesort-001.yaml b/eval-server/nodejs/evals/action-agent/tablesort-001.yaml new file mode 100644 index 0000000..32695c7 --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/tablesort-001.yaml @@ -0,0 +1,46 @@ +# Table column sorting test +id: "tablesort-001" +name: "Sort Table Column" +description: "Test clicking table column header to sort data" +enabled: true + +target: + url: "https://datatables.net/examples/basic_init/zero_configuration.html" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Click on the \"Name\" column header to sort the table by name" + reasoning: "Testing table column sorting interaction" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Located the Name column header" + - "Successfully clicked the column header" + - "Table data reordered by name alphabetically" + - "Sort indicator appeared on the Name column" + - "Table sorting completed without errors" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify table rows are now sorted alphabetically by name" + - "Check if sort arrow/indicator appears on Name column" + - "Confirm the data order changed from before to after" + - "Ensure table structure remained intact after sorting" + +metadata: + tags: ["action", "table", "sort", "column", "data"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/tabs-001.yaml b/eval-server/nodejs/evals/action-agent/tabs-001.yaml new file mode 100644 index 0000000..1079266 --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/tabs-001.yaml @@ -0,0 +1,46 @@ +# Tab panel navigation test +id: "tabs-001" +name: "Navigate Tab Panels" +description: "Test clicking tab to switch between tab panels" +enabled: true + +target: + url: "https://jqueryui.com/tabs/" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Click on the \"Nunc tincidunt\" tab to switch to that panel" + reasoning: "Testing tab panel navigation" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Located the \"Nunc tincidunt\" tab button" + - "Successfully clicked the tab" + - "Tab panel content switched to the selected tab" + - "Active tab visual state changed appropriately" + - "Content area updated to show the new panel" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify the \"Nunc tincidunt\" tab is now active/highlighted" + - "Check if the content panel changed to show new content" + - "Confirm the tab switching animation completed" + - "Ensure the correct tab content is visible" + +metadata: + tags: ["action", "tabs", "navigation", "panels", "ui"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/timepicker-001.yaml b/eval-server/nodejs/evals/action-agent/timepicker-001.yaml new file mode 100644 index 0000000..cbc5742 --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/timepicker-001.yaml @@ -0,0 +1,46 @@ +# Time picker test +id: "timepicker-001" +name: "Select Time from Picker" +description: "Test setting time using time picker controls" +enabled: true + +target: + url: "https://timepicker.co/" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Set the time to 2:30 PM using the time picker controls" + reasoning: "Testing time selection with hour/minute controls" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Located the time picker interface" + - "Set the hour to 2 (14 for 24-hour format)" + - "Set the minutes to 30" + - "Selected PM or appropriate time format" + - "Time input shows 2:30 PM or equivalent" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify the time input displays 2:30 PM or 14:30" + - "Check if hour and minute were set correctly" + - "Confirm AM/PM selection if applicable" + - "Ensure the time picker interface was properly used" + +metadata: + tags: ["action", "timepicker", "time", "form", "clock"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/upload-001.yaml b/eval-server/nodejs/evals/action-agent/upload-001.yaml new file mode 100644 index 0000000..d5c276c --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/upload-001.yaml @@ -0,0 +1,46 @@ +# File upload test +id: "upload-001" +name: "Upload File via Input" +description: "Test clicking file input and uploading a test file" +enabled: true + +target: + url: "https://the-internet.herokuapp.com/upload" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Click the file input and upload a test file" + reasoning: "Testing file upload interaction through input elements" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Located the file input element" + - "Triggered file selection dialog" + - "Selected a file for upload" + - "File name appears in the input field" + - "Upload process initiated successfully" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify file name appears in the upload input field" + - "Check if file selection was successful" + - "Confirm upload button is available or file is ready" + - "Ensure no upload errors are displayed" + +metadata: + tags: ["action", "upload", "file", "input", "form"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/video-001.yaml b/eval-server/nodejs/evals/action-agent/video-001.yaml new file mode 100644 index 0000000..17c76be --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/video-001.yaml @@ -0,0 +1,47 @@ +# Video playback controls test +id: "video-001" +name: "Control Video Playback" +description: "Test starting video playback using click + spacebar" +enabled: true + +target: + url: "https://www.w3schools.com/html/html5_video.asp" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 90000 + +input: + objective: "Click the video element to focus it, then press spacebar to start playback" + reasoning: "Testing video control using standard keyboard interaction (click to focus + spacebar to play)" + hint: "First click the Video element to focus it, then use keyboard input to press the spacebar key to start playback" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Located the Video element in the accessibility tree" + - "Successfully clicked the Video element to focus it" + - "Used keyboard input to press spacebar" + - "Video playback started after spacebar press" + - "No errors occurred during the interaction sequence" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify video player is visible on the page" + - "Check if the play button was clicked (may show pause button after)" + - "Look for visual indicators that video started playing" + - "Ensure no error messages appeared during video interaction" + +metadata: + tags: ["action", "video", "media", "controls", "playback"] + priority: "high" + timeout: 90000 + retries: 3 + flaky: true + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/action-agent/video-002.yaml b/eval-server/nodejs/evals/action-agent/video-002.yaml new file mode 100644 index 0000000..b20014c --- /dev/null +++ b/eval-server/nodejs/evals/action-agent/video-002.yaml @@ -0,0 +1,47 @@ +# Video play button specific targeting test +id: "video-002" +name: "Click Video Play Button Specifically" +description: "Test clicking the specific play button (not the video element)" +enabled: true + +target: + url: "https://www.w3schools.com/html/html5_video.asp" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Find and click the button that has name=\"play\" (not the Video element itself)" + reasoning: "Testing specific targeting of the play button element" + hint: "Target the button element with text or label \"play\", do not click the Video element" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Found a button element (not Video element) with \"play\" in the name" + - "Successfully clicked the play button specifically" + - "Did not click on the Video element itself" + - "Play button click was executed correctly" + - "Video responded to the button click" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify the play button (not video element) was clicked" + - "Check if video started playing after button click" + - "Confirm the target was the button, not the video container" + - "Look for changes in video player state" + +metadata: + tags: ["action", "video", "button", "specific-targeting"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/config.yaml b/eval-server/nodejs/evals/config.yaml new file mode 100644 index 0000000..3968421 --- /dev/null +++ b/eval-server/nodejs/evals/config.yaml @@ -0,0 +1,11 @@ +# model: +# main_model: "deepseek-r1:14b" +# mini_model: "deepseek-r1:14b" +# nano_model: "deepseek-r1:14b" +# provider: "litellm" + +model: + main_model: "gpt-4.1" + mini_model: "gpt-4.1-mini" + nano_model: "gpt-4.1-nano" + provider: "openai" \ No newline at end of file diff --git a/eval-server/nodejs/evals/end-to-end/b-vitamins-research-001.yaml b/eval-server/nodejs/evals/end-to-end/b-vitamins-research-001.yaml new file mode 100644 index 0000000..746ead6 --- /dev/null +++ b/eval-server/nodejs/evals/end-to-end/b-vitamins-research-001.yaml @@ -0,0 +1,35 @@ +# B-Vitamins Research - End-to-End Test +id: "vitamins-research-001" +name: "B-Vitamins Supplementation Research" +description: "End-to-end test for comprehensive B-vitamins research using chat interface" +enabled: true + +tool: "chat" +timeout: 600000 + +input: + message: "Research everything on the supplementation of B-vitamins for adults. I need: types of vitamins, available forms and their advantages, dosage and safety" + reasoning: "End-to-end test validating complete user workflow with dynamic tool usage for health research" + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Covers all B-vitamin types (B1, B2, B3, B5, B6, B7, B9, B12) comprehensively" + - "Explains different forms of each vitamin and their advantages" + - "Provides appropriate dosage recommendations for adults" + - "Discusses safety considerations and potential side effects" + - "Information is accurate and from reliable health sources" + - "Response is well-organized and easy to understand" + - "Demonstrates intelligent tool selection for health research" + - "Shows complete workflow from request to comprehensive result" + +metadata: + tags: ["end-to-end", "chat", "health", "vitamins", "research", "user-workflow"] + priority: "medium" + timeout: 300000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/end-to-end/investment-research-001.yaml b/eval-server/nodejs/evals/end-to-end/investment-research-001.yaml new file mode 100644 index 0000000..72014df --- /dev/null +++ b/eval-server/nodejs/evals/end-to-end/investment-research-001.yaml @@ -0,0 +1,35 @@ +# Renewable Energy Stocks Research - End-to-End Test +id: "investment-research-001" +name: "Renewable Energy Stocks Research" +description: "End-to-end test for investment research using chat interface" +enabled: true + +tool: "chat" +timeout: 600000 + +input: + message: "Research renewable energy stocks for potential investment. Focus on solar and wind companies with market cap over $1B." + reasoning: "End-to-end test validating financial research workflow with dynamic tool usage" + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Identifies specific solar and wind energy companies" + - "Confirms companies have market cap over $1 billion" + - "Provides relevant financial metrics and data" + - "Includes business descriptions and growth prospects" + - "Discusses investment considerations and risks" + - "Information appears current and from reliable sources" + - "Demonstrates intelligent financial research tool usage" + - "Shows complete workflow from request to investment analysis" + +metadata: + tags: ["end-to-end", "chat", "investment", "stocks", "renewable-energy", "financial", "user-workflow"] + priority: "medium" + timeout: 300000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/end-to-end/product-comparison-001.yaml b/eval-server/nodejs/evals/end-to-end/product-comparison-001.yaml new file mode 100644 index 0000000..1363a09 --- /dev/null +++ b/eval-server/nodejs/evals/end-to-end/product-comparison-001.yaml @@ -0,0 +1,40 @@ +# Headphones Comparison - End-to-End Test +id: "product-comparison-001" +name: "Noise-Canceling Headphones Comparison" +description: "End-to-end test for product research and comparison using chat interface" +enabled: true + +target: + url: "https://www.google.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "chat" +timeout: 300000 + +input: + message: "Compare the top 3 noise-canceling headphones under $300. Include features, pros/cons, and where to buy them." + reasoning: "End-to-end test validating product comparison workflow with dynamic tool usage" + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Identifies 3 specific noise-canceling headphones under $300" + - "Provides detailed feature comparison for each model" + - "Lists pros and cons for each headphone clearly" + - "Includes pricing information and purchase locations" + - "Comparison is fair and based on objective criteria" + - "Information appears current and accurate" + - "Demonstrates intelligent research and extraction tool usage" + - "Shows complete workflow from request to actionable buying guide" + +metadata: + tags: ["end-to-end", "chat", "product", "comparison", "headphones", "shopping", "user-workflow"] + priority: "medium" + timeout: 300000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/end-to-end/recipe-nutrition-001.yaml b/eval-server/nodejs/evals/end-to-end/recipe-nutrition-001.yaml new file mode 100644 index 0000000..ef8b0f0 --- /dev/null +++ b/eval-server/nodejs/evals/end-to-end/recipe-nutrition-001.yaml @@ -0,0 +1,40 @@ +# Healthy Recipe Search - End-to-End Test +id: "recipe-nutrition-001" +name: "Healthy Family Dinner Recipes" +description: "End-to-end test for recipe search with nutrition criteria using chat interface" +enabled: true + +target: + url: "https://www.google.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "chat" +timeout: 300000 + +input: + message: "Find me 3 healthy dinner recipes for a family of 4 that are under 500 calories per serving and take less than 30 minutes to prepare." + reasoning: "End-to-end test validating recipe search workflow with specific nutritional and time criteria" + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Provides exactly 3 dinner recipes suitable for family of 4" + - "Each recipe is under 500 calories per serving" + - "All recipes can be prepared in under 30 minutes" + - "Includes ingredient lists and cooking instructions" + - "Nutritional information is provided or estimated" + - "Recipes are practical and family-friendly" + - "Demonstrates intelligent recipe search and analysis" + - "Shows complete workflow from request to actionable meal plan" + +metadata: + tags: ["end-to-end", "chat", "recipes", "nutrition", "healthy", "family", "user-workflow"] + priority: "medium" + timeout: 300000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/end-to-end/travel-planning-001.yaml b/eval-server/nodejs/evals/end-to-end/travel-planning-001.yaml new file mode 100644 index 0000000..401f8b1 --- /dev/null +++ b/eval-server/nodejs/evals/end-to-end/travel-planning-001.yaml @@ -0,0 +1,40 @@ +# Barcelona Travel Planning - End-to-End Test +id: "travel-planning-001" +name: "Barcelona Trip Planning" +description: "End-to-end test for comprehensive travel planning using chat interface" +enabled: true + +target: + url: "https://www.google.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "chat" +timeout: 300000 + +input: + message: "Help me plan a 3-day trip to Barcelona. I need flight options from New York, hotel recommendations in the city center, and top 5 attractions to visit." + reasoning: "End-to-end test validating complete travel planning workflow with dynamic tool usage" + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Provides realistic flight options from New York to Barcelona" + - "Recommends hotels in Barcelona city center with details" + - "Lists top 5 attractions in Barcelona with descriptions" + - "Information is current and practically useful for trip planning" + - "Includes relevant details like prices, locations, or booking info" + - "Response is well-organized into clear sections" + - "Demonstrates multi-tool usage for comprehensive planning" + - "Shows complete workflow from request to actionable itinerary" + +metadata: + tags: ["end-to-end", "chat", "travel", "planning", "barcelona", "user-workflow"] + priority: "medium" + timeout: 300000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/research-agent/basic-001.yaml b/eval-server/nodejs/evals/research-agent/basic-001.yaml new file mode 100644 index 0000000..fcd0086 --- /dev/null +++ b/eval-server/nodejs/evals/research-agent/basic-001.yaml @@ -0,0 +1,39 @@ +# Basic research test - stable topic with clear sources +id: "basic-001" +name: "Research Chrome DevTools History" +description: "Research the history and development of Chrome DevTools" +enabled: true + +target: + url: "https://www.google.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "research_agent" +timeout: 180000 + +input: + query: "History and development of Chrome DevTools browser developer tools" + reasoning: "Testing basic research capabilities on a well-documented technical topic" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0 + criteria: + - "Research covers the origins and early development of Chrome DevTools" + - "Information includes key milestones and major feature additions" + - "Sources include official documentation or reliable technical sources" + - "At least 3-5 different sources were consulted" + - "Information is factually accurate and up-to-date" + - "Research demonstrates understanding of the topic evolution" + - "Handoff to content_writer_agent occurred with comprehensive data" + +metadata: + tags: ["basic", "technical", "stable", "documentation"] + priority: "high" + timeout: 180000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/research-agent/business-001.yaml b/eval-server/nodejs/evals/research-agent/business-001.yaml new file mode 100644 index 0000000..7558120 --- /dev/null +++ b/eval-server/nodejs/evals/research-agent/business-001.yaml @@ -0,0 +1,39 @@ +# Business research test +id: "business-001" +name: "Research Remote Work Productivity" +description: "Research remote work impact on productivity and business outcomes" +enabled: true + +target: + url: "https://www.google.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "research_agent" +timeout: 240000 + +input: + query: "Remote work productivity statistics impact business outcomes 2024 studies" + reasoning: "Testing business research requiring statistical data and multiple perspectives" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Research includes statistical data and survey results" + - "Covers multiple perspectives (employee, employer, industry)" + - "Sources include business publications, research studies, and reports" + - "Information addresses both positive and negative impacts" + - "Data is recent and relevant to current work trends" + - "Research demonstrates understanding of business implications" + - "Statistics and claims are properly sourced" + +metadata: + tags: ["business", "statistics", "workplace", "comprehensive"] + priority: "high" + timeout: 240000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/research-agent/comparison-001.yaml b/eval-server/nodejs/evals/research-agent/comparison-001.yaml new file mode 100644 index 0000000..a9aa22b --- /dev/null +++ b/eval-server/nodejs/evals/research-agent/comparison-001.yaml @@ -0,0 +1,39 @@ +# Comparative research test +id: "comparison-001" +name: "Compare JavaScript vs TypeScript" +description: "Research and compare JavaScript and TypeScript for web development" +enabled: true + +target: + url: "https://www.google.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "research_agent" +timeout: 200000 + +input: + query: "JavaScript vs TypeScript comparison web development pros cons differences" + reasoning: "Testing comparative research requiring balanced analysis of multiple options" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Research covers both JavaScript and TypeScript comprehensively" + - "Includes clear comparison points (syntax, features, ecosystem)" + - "Presents advantages and disadvantages of each language" + - "Sources include technical documentation and developer resources" + - "Information is balanced and objective, not biased toward one option" + - "Demonstrates understanding of use cases for each language" + - "Research data is well-organized for comparative analysis" + +metadata: + tags: ["comparison", "technical", "programming", "balanced"] + priority: "high" + timeout: 200000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/research-agent/current-001.yaml b/eval-server/nodejs/evals/research-agent/current-001.yaml new file mode 100644 index 0000000..6878868 --- /dev/null +++ b/eval-server/nodejs/evals/research-agent/current-001.yaml @@ -0,0 +1,40 @@ +# Current events research test +id: "current-001" +name: "Research Latest AI Development Trends" +description: "Research recent developments in AI and machine learning (last 6 months)" +enabled: true + +target: + url: "https://www.google.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "research_agent" +timeout: 240000 + +input: + query: "Latest AI artificial intelligence developments breakthroughs 2024 2025" + reasoning: "Testing research on current events and rapidly evolving topics" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + include_url: true + criteria: + - "Research focuses on recent developments (within last 6 months)" + - "Covers multiple aspects of AI development (models, applications, research)" + - "Sources are current and from reputable news or research outlets" + - "Information includes specific examples or case studies" + - "Demonstrates ability to identify current trends vs older information" + - "Successfully gathered information from diverse source types" + - "Data is properly organized for content writer handoff" + +metadata: + tags: ["current-events", "ai", "dynamic", "trends"] + priority: "high" + timeout: 240000 + retries: 1 + flaky: true + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/research-agent/edge-001.yaml b/eval-server/nodejs/evals/research-agent/edge-001.yaml new file mode 100644 index 0000000..d75c2bf --- /dev/null +++ b/eval-server/nodejs/evals/research-agent/edge-001.yaml @@ -0,0 +1,39 @@ +# No-results edge case test +id: "edge-001" +name: "Research Obscure Fictional Topic" +description: "Test handling of queries with very limited or no reliable sources" +enabled: true + +target: + url: "https://www.google.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "research_agent" +timeout: 180000 + +input: + query: "quantum bluetooth watermelon encryption algorithm 2024" + reasoning: "Testing edge case handling when query yields no meaningful results" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Agent recognizes when query yields limited or unreliable results" + - "Demonstrates appropriate search strategy modification" + - "Does not fabricate information when sources are unavailable" + - "Gracefully handles lack of substantive results" + - "Still attempts handoff to content writer with available information" + - "Maintains professional approach despite limited data" + - "Shows appropriate uncertainty when information is sparse" + +metadata: + tags: ["edge-case", "no-results", "error-handling", "fictional"] + priority: "high" + timeout: 180000 + retries: 1 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/research-agent/research-agent-basic-001.yaml b/eval-server/nodejs/evals/research-agent/research-agent-basic-001.yaml new file mode 100644 index 0000000..85743d5 --- /dev/null +++ b/eval-server/nodejs/evals/research-agent/research-agent-basic-001.yaml @@ -0,0 +1,39 @@ +# Basic research test - stable topic with clear sources +id: "research-agent-basic-001" +name: "Research Chrome DevTools History" +description: "Research the history and development of Chrome DevTools" +enabled: true + +target: + url: "https://www.google.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "research_agent" +timeout: 180000 + +input: + query: "History and development of Chrome DevTools browser developer tools" + reasoning: "Testing basic research capabilities on a well-documented technical topic" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0 + criteria: + - "Research covers the origins and early development of Chrome DevTools" + - "Information includes key milestones and major feature additions" + - "Sources include official documentation or reliable technical sources" + - "At least 3-5 different sources were consulted" + - "Information is factually accurate and up-to-date" + - "Research demonstrates understanding of the topic evolution" + - "Handoff to content_writer_agent occurred with comprehensive data" + +metadata: + tags: ["basic", "technical", "stable", "documentation"] + priority: "high" + timeout: 180000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/research-agent/research-agent-business-001.yaml b/eval-server/nodejs/evals/research-agent/research-agent-business-001.yaml new file mode 100644 index 0000000..defeed1 --- /dev/null +++ b/eval-server/nodejs/evals/research-agent/research-agent-business-001.yaml @@ -0,0 +1,39 @@ +# Business research test +id: "research-agent-business-001" +name: "Research Remote Work Productivity" +description: "Research remote work impact on productivity and business outcomes" +enabled: true + +target: + url: "https://www.google.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "research_agent" +timeout: 240000 + +input: + query: "Remote work productivity statistics impact business outcomes 2024 studies" + reasoning: "Testing business research requiring statistical data and multiple perspectives" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Research includes statistical data and survey results" + - "Covers multiple perspectives (employee, employer, industry)" + - "Sources include business publications, research studies, and reports" + - "Information addresses both positive and negative impacts" + - "Data is recent and relevant to current work trends" + - "Research demonstrates understanding of business implications" + - "Statistics and claims are properly sourced" + +metadata: + tags: ["business", "statistics", "workplace", "comprehensive"] + priority: "high" + timeout: 240000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/research-agent/research-agent-comparison-001.yaml b/eval-server/nodejs/evals/research-agent/research-agent-comparison-001.yaml new file mode 100644 index 0000000..a433a58 --- /dev/null +++ b/eval-server/nodejs/evals/research-agent/research-agent-comparison-001.yaml @@ -0,0 +1,39 @@ +# Comparative research test +id: "research-agent-comparison-001" +name: "Compare JavaScript vs TypeScript" +description: "Research and compare JavaScript and TypeScript for web development" +enabled: true + +target: + url: "https://www.google.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "research_agent" +timeout: 200000 + +input: + query: "JavaScript vs TypeScript comparison web development pros cons differences" + reasoning: "Testing comparative research requiring balanced analysis of multiple options" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Research covers both JavaScript and TypeScript comprehensively" + - "Includes clear comparison points (syntax, features, ecosystem)" + - "Presents advantages and disadvantages of each language" + - "Sources include technical documentation and developer resources" + - "Information is balanced and objective, not biased toward one option" + - "Demonstrates understanding of use cases for each language" + - "Research data is well-organized for comparative analysis" + +metadata: + tags: ["comparison", "technical", "programming", "balanced"] + priority: "high" + timeout: 200000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/research-agent/research-agent-current-001.yaml b/eval-server/nodejs/evals/research-agent/research-agent-current-001.yaml new file mode 100644 index 0000000..198c981 --- /dev/null +++ b/eval-server/nodejs/evals/research-agent/research-agent-current-001.yaml @@ -0,0 +1,40 @@ +# Current events research test +id: "research-agent-current-001" +name: "Research Latest AI Development Trends" +description: "Research recent developments in AI and machine learning (last 6 months)" +enabled: true + +target: + url: "https://www.google.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "research_agent" +timeout: 240000 + +input: + query: "Latest AI artificial intelligence developments breakthroughs 2024 2025" + reasoning: "Testing research on current events and rapidly evolving topics" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + include_url: true + criteria: + - "Research focuses on recent developments (within last 6 months)" + - "Covers multiple aspects of AI development (models, applications, research)" + - "Sources are current and from reputable news or research outlets" + - "Information includes specific examples or case studies" + - "Demonstrates ability to identify current trends vs older information" + - "Successfully gathered information from diverse source types" + - "Data is properly organized for content writer handoff" + +metadata: + tags: ["current-events", "ai", "dynamic", "trends"] + priority: "high" + timeout: 240000 + retries: 1 + flaky: true + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/research-agent/research-agent-edge-001.yaml b/eval-server/nodejs/evals/research-agent/research-agent-edge-001.yaml new file mode 100644 index 0000000..234c832 --- /dev/null +++ b/eval-server/nodejs/evals/research-agent/research-agent-edge-001.yaml @@ -0,0 +1,39 @@ +# No-results edge case test +id: "research-agent-edge-001" +name: "Research Obscure Fictional Topic" +description: "Test handling of queries with very limited or no reliable sources" +enabled: true + +target: + url: "https://www.google.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "research_agent" +timeout: 180000 + +input: + query: "quantum bluetooth watermelon encryption algorithm 2024" + reasoning: "Testing edge case handling when query yields no meaningful results" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Agent recognizes when query yields limited or unreliable results" + - "Demonstrates appropriate search strategy modification" + - "Does not fabricate information when sources are unavailable" + - "Gracefully handles lack of substantive results" + - "Still attempts handoff to content writer with available information" + - "Maintains professional approach despite limited data" + - "Shows appropriate uncertainty when information is sparse" + +metadata: + tags: ["edge-case", "no-results", "error-handling", "fictional"] + priority: "high" + timeout: 180000 + retries: 1 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/research-agent/research-agent-technical-001.yaml b/eval-server/nodejs/evals/research-agent/research-agent-technical-001.yaml new file mode 100644 index 0000000..c5e2540 --- /dev/null +++ b/eval-server/nodejs/evals/research-agent/research-agent-technical-001.yaml @@ -0,0 +1,39 @@ +# Deep technical research test +id: "research-agent-technical-001" +name: "Research WebAssembly Performance" +description: "Deep dive research into WebAssembly performance characteristics and use cases" +enabled: true + +target: + url: "https://www.google.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "research_agent" +timeout: 900000 + +input: + query: "WebAssembly WASM performance benchmarks use cases implementation details" + reasoning: "Testing deep technical research requiring specialized knowledge synthesis" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Research covers technical details of WebAssembly architecture" + - "Includes performance benchmarks and comparison data" + - "Discusses practical use cases and implementation scenarios" + - "Sources include technical specifications, benchmarks, and expert analysis" + - "Information demonstrates deep understanding of the technology" + - "Research addresses both benefits and limitations" + - "Technical accuracy is maintained throughout" + +metadata: + tags: ["technical", "deep-dive", "performance", "webassembly"] + priority: "high" + timeout: 900000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/research-agent/research-agent-tools-001.yaml b/eval-server/nodejs/evals/research-agent/research-agent-tools-001.yaml new file mode 100644 index 0000000..44da108 --- /dev/null +++ b/eval-server/nodejs/evals/research-agent/research-agent-tools-001.yaml @@ -0,0 +1,40 @@ +# Tool orchestration test - focuses on how well the agent uses available tools +id: "research-agent-tools-001" +name: "Research Python Framework Comparison" +description: "Research comparing Django vs Flask Python frameworks with focus on tool usage" +enabled: true + +target: + url: "https://www.google.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "research_agent" +timeout: 240000 + +input: + query: "Django vs Flask Python web framework comparison features performance" + reasoning: "Testing effective orchestration of navigation, extraction, and fetching tools" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Agent effectively used navigate_url to access search engines" + - "Schema-based extraction was used to gather structured search results" + - "Fetcher tool was used to collect content from multiple URLs" + - "Navigation strategy was logical and systematic" + - "Tool usage demonstrated purposeful research progression" + - "Information from different tools was effectively synthesized" + - "At least 3-5 different sources were accessed and processed" + - "Final handoff included comprehensive data from all tools" + +metadata: + tags: ["tool-orchestration", "systematic", "python", "frameworks"] + priority: "high" + timeout: 240000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/research-agent/technical-001.yaml b/eval-server/nodejs/evals/research-agent/technical-001.yaml new file mode 100644 index 0000000..f434081 --- /dev/null +++ b/eval-server/nodejs/evals/research-agent/technical-001.yaml @@ -0,0 +1,39 @@ +# Deep technical research test +id: "technical-001" +name: "Research WebAssembly Performance" +description: "Deep dive research into WebAssembly performance characteristics and use cases" +enabled: true + +target: + url: "https://www.google.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "research_agent" +timeout: 900000 + +input: + query: "WebAssembly WASM performance benchmarks use cases implementation details" + reasoning: "Testing deep technical research requiring specialized knowledge synthesis" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Research covers technical details of WebAssembly architecture" + - "Includes performance benchmarks and comparison data" + - "Discusses practical use cases and implementation scenarios" + - "Sources include technical specifications, benchmarks, and expert analysis" + - "Information demonstrates deep understanding of the technology" + - "Research addresses both benefits and limitations" + - "Technical accuracy is maintained throughout" + +metadata: + tags: ["technical", "deep-dive", "performance", "webassembly"] + priority: "high" + timeout: 900000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/research-agent/tools-001.yaml b/eval-server/nodejs/evals/research-agent/tools-001.yaml new file mode 100644 index 0000000..ae97430 --- /dev/null +++ b/eval-server/nodejs/evals/research-agent/tools-001.yaml @@ -0,0 +1,40 @@ +# Tool orchestration test - focuses on how well the agent uses available tools +id: "tools-001" +name: "Research Python Framework Comparison" +description: "Research comparing Django vs Flask Python frameworks with focus on tool usage" +enabled: true + +target: + url: "https://www.google.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "research_agent" +timeout: 240000 + +input: + query: "Django vs Flask Python web framework comparison features performance" + reasoning: "Testing effective orchestration of navigation, extraction, and fetching tools" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Agent effectively used navigate_url to access search engines" + - "Schema-based extraction was used to gather structured search results" + - "Fetcher tool was used to collect content from multiple URLs" + - "Navigation strategy was logical and systematic" + - "Tool usage demonstrated purposeful research progression" + - "Information from different tools was effectively synthesized" + - "At least 3-5 different sources were accessed and processed" + - "Final handoff included comprehensive data from all tools" + +metadata: + tags: ["tool-orchestration", "systematic", "python", "frameworks"] + priority: "high" + timeout: 240000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/schema-extractor/amazon-product-001.yaml b/eval-server/nodejs/evals/schema-extractor/amazon-product-001.yaml new file mode 100644 index 0000000..42e4738 --- /dev/null +++ b/eval-server/nodejs/evals/schema-extractor/amazon-product-001.yaml @@ -0,0 +1,78 @@ +# E-commerce product extraction test +id: "amazon-product-001" +name: "Extract Amazon Product Details" +description: "Extract product information from an Amazon product page" +enabled: true + +target: + url: "https://www.amazon.com/Obelisk-Climbing-Rustproof-Trellises-Clematis/dp/B0B4SBY6QD/" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "extract_data" +timeout: 60000 + +input: + schema: + type: "object" + properties: + product: + type: "object" + properties: + title: + type: "string" + brand: + type: "string" + price: + type: "object" + properties: + current: + type: "number" + currency: + type: "string" + rating: + type: "object" + properties: + average: + type: "number" + count: + type: "number" + images: + type: "array" + items: + type: "string" + format: "url" + features: + type: "array" + items: + type: "string" + required: + - "title" + - "price" + availability: + type: "string" + required: + - "product" + instruction: "Extract comprehensive product information including pricing, ratings, and key features" + reasoning: "Testing extraction from a dynamic e-commerce page with complex structure" + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Product title is accurate and complete" + - "Price information is current and properly formatted" + - "Rating data includes both average and review count" + - "Image URLs are valid and accessible" + - "Key product features are captured" + - "All URLs are properly resolved (not node IDs)" + +metadata: + tags: ["ecommerce", "amazon", "product", "dynamic"] + priority: "high" + timeout: 60000 + retries: 3 + flaky: true + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/schema-extractor/bbc-news-001.yaml b/eval-server/nodejs/evals/schema-extractor/bbc-news-001.yaml new file mode 100644 index 0000000..6843147 --- /dev/null +++ b/eval-server/nodejs/evals/schema-extractor/bbc-news-001.yaml @@ -0,0 +1,69 @@ +# News article extraction test +id: "bbc-news-001" +name: "Extract BBC News Article" +description: "Extract article content and metadata from a BBC News page" +enabled: true + +target: + url: "https://www.bbc.com/news/technology" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "extract_data" +timeout: 30000 + +input: + schema: + type: "object" + properties: + headlines: + type: "array" + items: + type: "object" + properties: + title: + type: "string" + summary: + type: "string" + url: + type: "string" + format: "url" + category: + type: "string" + required: + - "title" + mainStory: + type: "object" + properties: + headline: + type: "string" + summary: + type: "string" + url: + type: "string" + format: "url" + required: + - "headlines" + instruction: "Extract the main headlines and featured stories from the BBC Technology news section" + reasoning: "Testing extraction from a news aggregation page with multiple articles" + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + include_url: true + criteria: + - "Headlines are current and relevant to technology news" + - "Article summaries provide meaningful context" + - "URLs link to valid BBC news articles" + - "Main story is properly identified" + - "All extracted content is in English" + +metadata: + tags: ["news", "bbc", "aggregation", "dynamic"] + priority: "high" + timeout: 30000 + retries: 2 + flaky: true + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/schema-extractor/bing-search-001.yaml b/eval-server/nodejs/evals/schema-extractor/bing-search-001.yaml new file mode 100644 index 0000000..7e7d674 --- /dev/null +++ b/eval-server/nodejs/evals/schema-extractor/bing-search-001.yaml @@ -0,0 +1,70 @@ +# Bing Search results extraction test +id: "bing-search-001" +name: "Extract Bing Search Results" +description: "Extract search results from Bing search page" +enabled: true + +target: + url: "https://www.bing.com/search?q=web+scraping+best+practices" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "extract_data" +timeout: 45000 + +input: + schema: + type: "object" + properties: + query: + type: "string" + searchResults: + type: "array" + items: + type: "object" + properties: + title: + type: "string" + url: + type: "string" + format: "url" + snippet: + type: "string" + datePublished: + type: "string" + required: + - "title" + - "url" + - "snippet" + sidebarInfo: + type: "object" + properties: + title: + type: "string" + description: + type: "string" + source: + type: "string" + required: + - "searchResults" + instruction: "Extract search results including titles, URLs, snippets, and any sidebar information from Bing" + reasoning: "Testing extraction from Bing search results with different layout than Google" + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Search results match the query intent" + - "Results include valid URLs and meaningful snippets" + - "Sidebar information is extracted when present" + - "No duplicate results in the list" + +metadata: + tags: ["search", "bing", "serp", "dynamic"] + priority: "high" + timeout: 45000 + retries: 2 + flaky: true + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/schema-extractor/github-repo-001-streamlined.yaml b/eval-server/nodejs/evals/schema-extractor/github-repo-001-streamlined.yaml new file mode 100644 index 0000000..07532e7 --- /dev/null +++ b/eval-server/nodejs/evals/schema-extractor/github-repo-001-streamlined.yaml @@ -0,0 +1,66 @@ +# Simple structured data test (Streamlined version) +id: "github-repo-001-streamlined" +name: "Extract GitHub Repository Info (Streamlined)" +description: "Extract basic repository information from a GitHub page using streamlined extractor" +enabled: true + +target: + url: "https://github.com/microsoft/TypeScript" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "extract_schema_streamlined" +timeout: 30000 + +input: + schema: + type: "object" + properties: + name: + type: "string" + description: + type: "string" + language: + type: "string" + stars: + type: "number" + forks: + type: "number" + topics: + type: "array" + items: + type: "string" + readme: + type: "object" + properties: + summary: + type: "string" + required: + - "name" + - "description" + instruction: "Extract repository metadata and basic statistics" + reasoning: "Testing extraction from a well-structured GitHub repository page" + +validation: + type: "hybrid" + snapshot: + exclude_paths: + - "stars" + - "forks" + structure_only: false + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Repository name matches the GitHub page" + - "Description accurately reflects the project purpose" + - "Programming language is correctly identified" + - "Topic tags are relevant to the project" + +metadata: + tags: ["github", "repository", "structured", "streamlined"] + priority: "high" + timeout: 30000 + retries: 1 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/schema-extractor/github-repo-001.yaml b/eval-server/nodejs/evals/schema-extractor/github-repo-001.yaml new file mode 100644 index 0000000..6693577 --- /dev/null +++ b/eval-server/nodejs/evals/schema-extractor/github-repo-001.yaml @@ -0,0 +1,66 @@ +# Simple structured data test +id: "github-repo-001" +name: "Extract GitHub Repository Info" +description: "Extract basic repository information from a GitHub page" +enabled: true + +target: + url: "https://github.com/microsoft/TypeScript" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "extract_data" +timeout: 30000 + +input: + schema: + type: "object" + properties: + name: + type: "string" + description: + type: "string" + language: + type: "string" + stars: + type: "number" + forks: + type: "number" + topics: + type: "array" + items: + type: "string" + readme: + type: "object" + properties: + summary: + type: "string" + required: + - "name" + - "description" + instruction: "Extract repository metadata and basic statistics" + reasoning: "Testing extraction from a well-structured GitHub repository page" + +validation: + type: "hybrid" + snapshot: + exclude_paths: + - "stars" + - "forks" + structure_only: false + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Repository name matches the GitHub page" + - "Description accurately reflects the project purpose" + - "Programming language is correctly identified" + - "Topic tags are relevant to the project" + +metadata: + tags: ["github", "repository", "structured"] + priority: "high" + timeout: 30000 + retries: 1 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/schema-extractor/google-flights-001.yaml b/eval-server/nodejs/evals/schema-extractor/google-flights-001.yaml new file mode 100644 index 0000000..ab2e53c --- /dev/null +++ b/eval-server/nodejs/evals/schema-extractor/google-flights-001.yaml @@ -0,0 +1,106 @@ +# Google Flights search extraction test +id: "google-flights-001" +name: "Extract Google Flights Search Results" +description: "Extract flight options from Google Flights search" +enabled: true + +target: + url: "https://www.google.com/travel/flights/search?tfs=CBwQAhojEgoyMDI1LTEyLTI0agwIAhIIL20vMGQ5anJyBwgBEgNTRk8aIxIKMjAyNS0xMi0zMWoHCAESA1NGT3IMCAISCC9tLzBkOWpyQAFIAXABggELCP___________wGYAQE" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "extract_data" +timeout: 60000 + +input: + schema: + type: "object" + properties: + searchCriteria: + type: "object" + properties: + origin: + type: "string" + destination: + type: "string" + departureDate: + type: "string" + returnDate: + type: "string" + tripType: + type: "string" + passengers: + type: "number" + flights: + type: "array" + items: + type: "object" + properties: + airline: + type: "string" + flightNumber: + type: "string" + departureTime: + type: "string" + arrivalTime: + type: "string" + duration: + type: "string" + stops: + type: "number" + price: + type: "object" + properties: + amount: + type: "number" + currency: + type: "string" + cabin: + type: "string" + bookingUrl: + type: "string" + format: "url" + legroom: + type: "string" + amenities: + type: "array" + items: + type: "string" + required: + - "airline" + - "departureTime" + - "arrivalTime" + - "price" + priceInsights: + type: "object" + properties: + trend: + type: "string" + recommendation: + type: "string" + averagePrice: + type: "number" + required: + - "flights" + instruction: "Extract flight options including airlines, times, prices, and amenities from Google Flights results" + reasoning: "Testing extraction from complex travel search interface with dynamic pricing" + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Flight times are in proper format" + - "Prices are numeric values with currency" + - "Airlines and flight numbers are accurate" + - "Stop information is correctly identified" + - "Duration is in readable format" + +metadata: + tags: ["travel", "flights", "google", "booking"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: true + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/schema-extractor/google-search-001.yaml b/eval-server/nodejs/evals/schema-extractor/google-search-001.yaml new file mode 100644 index 0000000..5763ba8 --- /dev/null +++ b/eval-server/nodejs/evals/schema-extractor/google-search-001.yaml @@ -0,0 +1,76 @@ +# Google Search results extraction test +id: "google-search-001" +name: "Extract Google Search Results" +description: "Extract search results from Google search page" +enabled: true + +target: + url: "https://www.google.com/search?q=chrome+devtools+tutorial" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "extract_data" +timeout: 45000 + +input: + schema: + type: "object" + properties: + query: + type: "string" + searchResults: + type: "array" + items: + type: "object" + properties: + title: + type: "string" + url: + type: "string" + format: "url" + snippet: + type: "string" + domain: + type: "string" + required: + - "title" + - "url" + - "snippet" + featuredSnippet: + type: "object" + properties: + content: + type: "string" + source: + type: "string" + url: + type: "string" + format: "url" + relatedSearches: + type: "array" + items: + type: "string" + required: + - "searchResults" + instruction: "Extract the top 10 search results with titles, URLs, and snippets. Also extract featured snippet if present and related searches" + reasoning: "Testing extraction from Google search results page with various result types" + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Search results are relevant to the query" + - "Each result has a valid title, URL, and snippet" + - "URLs are properly resolved and not node IDs" + - "Related searches are extracted if present" + - "Featured snippet is captured when available" + +metadata: + tags: ["search", "google", "serp", "dynamic"] + priority: "high" + timeout: 45000 + retries: 2 + flaky: true + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/schema-extractor/homedepot-001.yaml b/eval-server/nodejs/evals/schema-extractor/homedepot-001.yaml new file mode 100644 index 0000000..2eb4883 --- /dev/null +++ b/eval-server/nodejs/evals/schema-extractor/homedepot-001.yaml @@ -0,0 +1,92 @@ +# Home Depot product search extraction test +id: "homedepot-001" +name: "Extract Home Depot Product Search" +description: "Extract product listings from Home Depot search results" +enabled: true + +target: + url: "https://www.homedepot.com/s/power%2520drill" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "extract_data" +timeout: 60000 + +input: + schema: + type: "object" + properties: + searchQuery: + type: "string" + totalResults: + type: "number" + products: + type: "array" + items: + type: "object" + properties: + name: + type: "string" + brand: + type: "string" + price: + type: "number" + originalPrice: + type: "number" + savings: + type: "number" + rating: + type: "number" + reviewCount: + type: "number" + productUrl: + type: "string" + format: "url" + imageUrl: + type: "string" + format: "url" + availability: + type: "string" + features: + type: "array" + items: + type: "string" + required: + - "name" + - "price" + - "productUrl" + filters: + type: "object" + properties: + brands: + type: "array" + items: + type: "string" + priceRanges: + type: "array" + items: + type: "string" + required: + - "products" + instruction: "Extract product listings from Home Depot search results including prices, ratings, and availability" + reasoning: "Testing extraction from e-commerce search results with product cards and filters" + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Products are relevant to the search query" + - "Prices are numeric values in USD" + - "Product URLs link to Home Depot product pages" + - "Ratings are on a 5-star scale" + - "Key product features are captured" + +metadata: + tags: ["ecommerce", "homedepot", "products", "search"] + priority: "high" + timeout: 60000 + retries: 3 + flaky: true + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/schema-extractor/macys-001.yaml b/eval-server/nodejs/evals/schema-extractor/macys-001.yaml new file mode 100644 index 0000000..81e05f9 --- /dev/null +++ b/eval-server/nodejs/evals/schema-extractor/macys-001.yaml @@ -0,0 +1,106 @@ +# Macy's product listing extraction test +id: "macys-001" +name: "Extract Macy's Product Listings" +description: "Extract fashion products from Macy's category page" +enabled: true + +target: + url: "https://www.macys.com/shop/womens-clothing/womens-dresses" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "extract_data" +timeout: 60000 + +input: + schema: + type: "object" + properties: + category: + type: "string" + totalProducts: + type: "number" + products: + type: "array" + items: + type: "object" + properties: + name: + type: "string" + brand: + type: "string" + currentPrice: + type: "number" + originalPrice: + type: "number" + discount: + type: "string" + colors: + type: "array" + items: + type: "string" + sizes: + type: "array" + items: + type: "string" + rating: + type: "number" + reviewCount: + type: "number" + productUrl: + type: "string" + format: "url" + imageUrl: + type: "string" + format: "url" + promotions: + type: "array" + items: + type: "string" + required: + - "name" + - "brand" + - "currentPrice" + refinements: + type: "object" + properties: + brands: + type: "array" + items: + type: "string" + sizes: + type: "array" + items: + type: "string" + colors: + type: "array" + items: + type: "string" + priceRanges: + type: "array" + items: + type: "string" + required: + - "products" + instruction: "Extract fashion products including prices, sizes, colors, and promotional offers from Macy's" + reasoning: "Testing extraction from fashion e-commerce with complex product attributes" + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Products are from the correct category" + - "Prices reflect current and sale prices" + - "Color and size options are captured" + - "Brand names are accurately extracted" + - "Promotional text is included when present" + +metadata: + tags: ["ecommerce", "macys", "fashion", "products"] + priority: "high" + timeout: 60000 + retries: 3 + flaky: true + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/schema-extractor/wikipedia-search-001.yaml b/eval-server/nodejs/evals/schema-extractor/wikipedia-search-001.yaml new file mode 100644 index 0000000..616f0d6 --- /dev/null +++ b/eval-server/nodejs/evals/schema-extractor/wikipedia-search-001.yaml @@ -0,0 +1,77 @@ +# Wikipedia search results extraction test +id: "wikipedia-search-001" +name: "Extract Wikipedia Search Results" +description: "Extract search results from Wikipedia search" +enabled: true + +target: + url: "https://en.wikipedia.org/w/index.php?search=artificial+intelligence&title=Special:Search" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "extract_data" +timeout: 30000 + +input: + schema: + type: "object" + properties: + searchTerm: + type: "string" + resultCount: + type: "number" + searchResults: + type: "array" + items: + type: "object" + properties: + title: + type: "string" + url: + type: "string" + format: "url" + snippet: + type: "string" + category: + type: "string" + wordCount: + type: "number" + lastEdited: + type: "string" + required: + - "title" + - "url" + - "snippet" + suggestedArticles: + type: "array" + items: + type: "object" + properties: + title: + type: "string" + url: + type: "string" + format: "url" + required: + - "searchResults" + instruction: "Extract Wikipedia search results including article titles, URLs, snippets, and metadata like word count or last edit date" + reasoning: "Testing extraction from Wikipedia's internal search with rich metadata" + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Search results are Wikipedia articles" + - "Each result has a valid Wikipedia URL" + - "Snippets contain relevant content highlights" + - "Metadata like word count is extracted when available" + +metadata: + tags: ["search", "wikipedia", "encyclopedia"] + priority: "high" + timeout: 30000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/screenshot-verification/dynamic-content-verification-001.yaml b/eval-server/nodejs/evals/screenshot-verification/dynamic-content-verification-001.yaml new file mode 100644 index 0000000..6ec53c4 --- /dev/null +++ b/eval-server/nodejs/evals/screenshot-verification/dynamic-content-verification-001.yaml @@ -0,0 +1,45 @@ +# Dynamic content visual verification test +id: "dynamic-content-verification-001" +name: "Dynamic Content Visual Verification" +description: "Test visual verification of dynamic content loading using screenshots" +enabled: true + +target: + url: "https://the-internet.herokuapp.com/dynamic_loading/1" + +tool: "action_agent" +timeout: 90000 + +input: + objective: "Take a screenshot, click the Start button, wait for content to load, then take another screenshot to verify the dynamic content appeared" + reasoning: "Testing visual verification of dynamic content changes using screenshot comparison" + hint: "Use take_screenshot before clicking Start, then again after the dynamic content loads" + + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4o" + criteria: + - "Initial screenshot captured the page before dynamic loading" + - "Start button was successfully clicked" + - "Agent waited for dynamic content to fully load" + - "Final screenshot shows the revealed dynamic content" + - "Visual comparison demonstrates successful content loading verification" + - "Screenshots show clear before/after difference in content visibility" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Compare screenshots to verify dynamic content loading" + - "Confirm the first screenshot shows hidden content area" + - "Verify the second screenshot shows the revealed 'Hello World!' text" + - "Check that the loading animation or process is properly captured" + +metadata: + tags: ["screenshot", "dynamic-content", "visual-verification", "loading"] + priority: "high" + timeout: 90000 + retries: 2 + flaky: true \ No newline at end of file diff --git a/eval-server/nodejs/evals/screenshot-verification/screenshot-error-handling-001.yaml b/eval-server/nodejs/evals/screenshot-verification/screenshot-error-handling-001.yaml new file mode 100644 index 0000000..6d31c50 --- /dev/null +++ b/eval-server/nodejs/evals/screenshot-verification/screenshot-error-handling-001.yaml @@ -0,0 +1,42 @@ +# Screenshot error handling test +id: "screenshot-error-handling-001" +name: "Screenshot Error Handling" +description: "Test screenshot tool error handling and recovery" +enabled: true + +target: + url: "https://httpstat.us/500" + +tool: "take_screenshot" +timeout: 30000 + +input: + fullPage: false + + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4o" + criteria: + - "Screenshot tool handled the error page gracefully" + - "Either successfully captured the error page or reported appropriate error" + - "No crashes or undefined behavior occurred" + - "Tool response is meaningful regardless of page loading issues" + - "Error handling demonstrates robustness of screenshot functionality" + visual_verification: + enabled: true + capture_before: false + capture_after: true + prompts: + - "If screenshot was taken, verify it shows the error page content" + - "Check that the tool handled the HTTP 500 error appropriately" + - "Confirm no blank or corrupted screenshots were produced" + - "Ensure error scenarios are handled professionally" + +metadata: + tags: ["screenshot", "error-handling", "robustness", "edge-case"] + priority: "normal" + timeout: 30000 + retries: 1 + flaky: true \ No newline at end of file diff --git a/eval-server/nodejs/evals/screenshot-verification/screenshot-fullpage-001.yaml b/eval-server/nodejs/evals/screenshot-verification/screenshot-fullpage-001.yaml new file mode 100644 index 0000000..a1c71f9 --- /dev/null +++ b/eval-server/nodejs/evals/screenshot-verification/screenshot-fullpage-001.yaml @@ -0,0 +1,43 @@ +# Full page screenshot verification test +id: "screenshot-fullpage-001" +name: "Take Full Page Screenshot" +description: "Test taking full page screenshot and verify functionality" +enabled: true + +target: + url: "https://en.wikipedia.org/wiki/Chrome_DevTools" + +tool: "take_screenshot" +timeout: 45000 + +input: + fullPage: true + + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4o" + criteria: + - "Full page screenshot was successfully captured" + - "Data URL contains valid image data" + - "Screenshot captures the entire page content including areas below the fold" + - "Image size is larger than viewport-only screenshot would be" + - "No errors occurred during full page capture" + - "Screenshot includes both header and footer content" + visual_verification: + enabled: true + capture_before: false + capture_after: true + prompts: + - "Verify the screenshot shows the complete Wikipedia article page" + - "Check that content above and below the fold is captured" + - "Confirm the image is taller than a typical viewport" + - "Ensure no content is cut off at the bottom" + +metadata: + tags: ["screenshot", "fullpage", "visual", "verification", "wikipedia"] + priority: "high" + timeout: 45000 + retries: 2 + flaky: false \ No newline at end of file diff --git a/eval-server/nodejs/evals/screenshot-verification/screenshot-viewport-001.yaml b/eval-server/nodejs/evals/screenshot-verification/screenshot-viewport-001.yaml new file mode 100644 index 0000000..69531ee --- /dev/null +++ b/eval-server/nodejs/evals/screenshot-verification/screenshot-viewport-001.yaml @@ -0,0 +1,42 @@ +# Viewport screenshot verification test +id: "screenshot-viewport-001" +name: "Take Viewport Screenshot" +description: "Test taking viewport screenshot and verify functionality" +enabled: true + +target: + url: "https://www.google.com" + +tool: "take_screenshot" +timeout: 30000 + +input: + fullPage: false + + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4o" + criteria: + - "Screenshot was successfully captured" + - "Data URL is properly formatted and contains image data" + - "Screenshot shows the viewport content correctly" + - "No errors occurred during screenshot capture" + - "Image data length indicates a valid screenshot was taken" + visual_verification: + enabled: true + capture_before: false + capture_after: true + prompts: + - "Verify the screenshot shows the Google homepage" + - "Check that the screenshot is not empty or corrupted" + - "Confirm the image quality is appropriate for verification" + - "Ensure the screenshot captures the current viewport accurately" + +metadata: + tags: ["screenshot", "viewport", "visual", "verification"] + priority: "high" + timeout: 30000 + retries: 2 + flaky: false \ No newline at end of file diff --git a/eval-server/nodejs/evals/screenshot-verification/visual-comparison-001.yaml b/eval-server/nodejs/evals/screenshot-verification/visual-comparison-001.yaml new file mode 100644 index 0000000..7434a93 --- /dev/null +++ b/eval-server/nodejs/evals/screenshot-verification/visual-comparison-001.yaml @@ -0,0 +1,45 @@ +# Visual comparison verification test +id: "visual-comparison-001" +name: "Visual Comparison Before and After Action" +description: "Test visual verification by comparing screenshots before and after an action" +enabled: true + +target: + url: "https://www.google.com" + +tool: "action_agent" +timeout: 60000 + +input: + objective: "Take a screenshot, then type 'DevTools testing' in the search box, and take another screenshot to compare" + reasoning: "Testing visual verification workflow with before/after screenshot comparison" + hint: "Use take_screenshot tool before and after performing the search input action" + + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4o" + criteria: + - "Initial screenshot was taken before performing any actions" + - "Search text was successfully entered into the search field" + - "Second screenshot was taken after the text input" + - "Visual comparison shows the difference between before and after states" + - "Search field contains the entered text in the final screenshot" + - "Screenshots demonstrate successful action verification workflow" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Compare the before and after screenshots" + - "Verify the search field is empty in the first screenshot" + - "Confirm the search field contains 'DevTools testing' in the second screenshot" + - "Check that the visual changes accurately reflect the performed action" + +metadata: + tags: ["screenshot", "visual-comparison", "action-verification", "before-after"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: false \ No newline at end of file diff --git a/eval-server/nodejs/evals/streamlined-schema-extractor/amazon-product-001.yaml b/eval-server/nodejs/evals/streamlined-schema-extractor/amazon-product-001.yaml new file mode 100644 index 0000000..b154454 --- /dev/null +++ b/eval-server/nodejs/evals/streamlined-schema-extractor/amazon-product-001.yaml @@ -0,0 +1,78 @@ +# E-commerce product extraction test (Streamlined) +id: "amazon-product-001" +name: "Extract Amazon Product Details" +description: "Extract product information from an Amazon product page" +enabled: true + +target: + url: "https://www.amazon.com/Obelisk-Climbing-Rustproof-Trellises-Clematis/dp/B0B4SBY6QD/" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "extract_schema_streamlined" +timeout: 60000 + +input: + schema: + type: "object" + properties: + product: + type: "object" + properties: + title: + type: "string" + brand: + type: "string" + price: + type: "object" + properties: + current: + type: "number" + currency: + type: "string" + rating: + type: "object" + properties: + average: + type: "number" + count: + type: "number" + images: + type: "array" + items: + type: "string" + format: "url" + features: + type: "array" + items: + type: "string" + required: + - "title" + - "price" + availability: + type: "string" + required: + - "product" + instruction: "Extract comprehensive product information including pricing, ratings, and key features" + reasoning: "Testing extraction from a dynamic e-commerce page with complex structure" + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Product title is accurate and complete" + - "Price information is current and properly formatted" + - "Rating data includes both average and review count" + - "Image URLs are valid and accessible" + - "Key product features are captured" + - "All URLs are properly resolved (not node IDs)" + +metadata: + tags: ["ecommerce", "amazon", "product", "dynamic"] + priority: "high" + timeout: 60000 + retries: 3 + flaky: true + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/streamlined-schema-extractor/bbc-news-001.yaml b/eval-server/nodejs/evals/streamlined-schema-extractor/bbc-news-001.yaml new file mode 100644 index 0000000..31ef288 --- /dev/null +++ b/eval-server/nodejs/evals/streamlined-schema-extractor/bbc-news-001.yaml @@ -0,0 +1,69 @@ +# News article extraction test (Streamlined) +id: "bbc-news-001" +name: "Extract BBC News Article" +description: "Extract article content and metadata from a BBC News page" +enabled: true + +target: + url: "https://www.bbc.com/news/technology" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "extract_schema_streamlined" +timeout: 30000 + +input: + schema: + type: "object" + properties: + headlines: + type: "array" + items: + type: "object" + properties: + title: + type: "string" + summary: + type: "string" + url: + type: "string" + format: "url" + category: + type: "string" + required: + - "title" + mainStory: + type: "object" + properties: + headline: + type: "string" + summary: + type: "string" + url: + type: "string" + format: "url" + required: + - "headlines" + instruction: "Extract the main headlines and featured stories from the BBC Technology news section" + reasoning: "Testing extraction from a news aggregation page with multiple articles" + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + include_url: true + criteria: + - "Headlines are current and relevant to technology news" + - "Article summaries provide meaningful context" + - "URLs link to valid BBC news articles" + - "Main story is properly identified" + - "All extracted content is in English" + +metadata: + tags: ["news", "bbc", "aggregation", "dynamic"] + priority: "high" + timeout: 30000 + retries: 2 + flaky: true + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/streamlined-schema-extractor/bing-search-001.yaml b/eval-server/nodejs/evals/streamlined-schema-extractor/bing-search-001.yaml new file mode 100644 index 0000000..e9f3b6e --- /dev/null +++ b/eval-server/nodejs/evals/streamlined-schema-extractor/bing-search-001.yaml @@ -0,0 +1,70 @@ +# Bing Search results extraction test +id: "bing-search-001" +name: "Extract Bing Search Results" +description: "Extract search results from Bing search page" +enabled: true + +target: + url: "https://www.bing.com/search?q=web+scraping+best+practices" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "extract_schema_streamlined" +timeout: 45000 + +input: + schema: + type: "object" + properties: + query: + type: "string" + searchResults: + type: "array" + items: + type: "object" + properties: + title: + type: "string" + url: + type: "string" + format: "url" + snippet: + type: "string" + datePublished: + type: "string" + required: + - "title" + - "url" + - "snippet" + sidebarInfo: + type: "object" + properties: + title: + type: "string" + description: + type: "string" + source: + type: "string" + required: + - "searchResults" + instruction: "Extract search results including titles, URLs, snippets, and any sidebar information from Bing" + reasoning: "Testing extraction from Bing search results with different layout than Google" + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Search results match the query intent" + - "Results include valid URLs and meaningful snippets" + - "Sidebar information is extracted when present" + - "No duplicate results in the list" + +metadata: + tags: ["search", "bing", "serp", "dynamic"] + priority: "high" + timeout: 45000 + retries: 2 + flaky: true + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/streamlined-schema-extractor/github-repo-001.yaml b/eval-server/nodejs/evals/streamlined-schema-extractor/github-repo-001.yaml new file mode 100644 index 0000000..5c496c5 --- /dev/null +++ b/eval-server/nodejs/evals/streamlined-schema-extractor/github-repo-001.yaml @@ -0,0 +1,66 @@ +# Simple structured data test (Streamlined) +id: "github-repo-001" +name: "Extract GitHub Repository Info" +description: "Extract basic repository information from a GitHub page" +enabled: true + +target: + url: "https://github.com/microsoft/TypeScript" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "extract_schema_streamlined" +timeout: 30000 + +input: + schema: + type: "object" + properties: + name: + type: "string" + description: + type: "string" + language: + type: "string" + stars: + type: "number" + forks: + type: "number" + topics: + type: "array" + items: + type: "string" + readme: + type: "object" + properties: + summary: + type: "string" + required: + - "name" + - "description" + instruction: "Extract repository metadata and basic statistics" + reasoning: "Testing extraction from a well-structured GitHub repository page" + +validation: + type: "hybrid" + snapshot: + exclude_paths: + - "stars" + - "forks" + structure_only: false + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Repository name matches the GitHub page" + - "Description accurately reflects the project purpose" + - "Programming language is correctly identified" + - "Topic tags are relevant to the project" + +metadata: + tags: ["github", "repository", "structured"] + priority: "high" + timeout: 30000 + retries: 1 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/streamlined-schema-extractor/google-flights-001.yaml b/eval-server/nodejs/evals/streamlined-schema-extractor/google-flights-001.yaml new file mode 100644 index 0000000..981ccbd --- /dev/null +++ b/eval-server/nodejs/evals/streamlined-schema-extractor/google-flights-001.yaml @@ -0,0 +1,106 @@ +# Google Flights search extraction test +id: "google-flights-001" +name: "Extract Google Flights Search Results" +description: "Extract flight options from Google Flights search" +enabled: true + +target: + url: "https://www.google.com/travel/flights/search?tfs=CBwQAhojEgoyMDI1LTEyLTI0agwIAhIIL20vMGQ5anJyBwgBEgNTRk8aIxIKMjAyNS0xMi0zMWoHCAESA1NGT3IMCAISCC9tLzBkOWpyQAFIAXABggELCP___________wGYAQE" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "extract_schema_streamlined" +timeout: 60000 + +input: + schema: + type: "object" + properties: + searchCriteria: + type: "object" + properties: + origin: + type: "string" + destination: + type: "string" + departureDate: + type: "string" + returnDate: + type: "string" + tripType: + type: "string" + passengers: + type: "number" + flights: + type: "array" + items: + type: "object" + properties: + airline: + type: "string" + flightNumber: + type: "string" + departureTime: + type: "string" + arrivalTime: + type: "string" + duration: + type: "string" + stops: + type: "number" + price: + type: "object" + properties: + amount: + type: "number" + currency: + type: "string" + cabin: + type: "string" + bookingUrl: + type: "string" + format: "url" + legroom: + type: "string" + amenities: + type: "array" + items: + type: "string" + required: + - "airline" + - "departureTime" + - "arrivalTime" + - "price" + priceInsights: + type: "object" + properties: + trend: + type: "string" + recommendation: + type: "string" + averagePrice: + type: "number" + required: + - "flights" + instruction: "Extract flight options including airlines, times, prices, and amenities from Google Flights results" + reasoning: "Testing extraction from complex travel search interface with dynamic pricing" + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Flight times are in proper format" + - "Prices are numeric values with currency" + - "Airlines and flight numbers are accurate" + - "Stop information is correctly identified" + - "Duration is in readable format" + +metadata: + tags: ["travel", "flights", "google", "booking"] + priority: "high" + timeout: 60000 + retries: 2 + flaky: true + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/streamlined-schema-extractor/google-search-001.yaml b/eval-server/nodejs/evals/streamlined-schema-extractor/google-search-001.yaml new file mode 100644 index 0000000..c1725d4 --- /dev/null +++ b/eval-server/nodejs/evals/streamlined-schema-extractor/google-search-001.yaml @@ -0,0 +1,76 @@ +# Google Search results extraction test +id: "google-search-001" +name: "Extract Google Search Results" +description: "Extract search results from Google search page" +enabled: true + +target: + url: "https://www.google.com/search?q=chrome+devtools+tutorial" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "extract_schema_streamlined" +timeout: 45000 + +input: + schema: + type: "object" + properties: + query: + type: "string" + searchResults: + type: "array" + items: + type: "object" + properties: + title: + type: "string" + url: + type: "string" + format: "url" + snippet: + type: "string" + domain: + type: "string" + required: + - "title" + - "url" + - "snippet" + featuredSnippet: + type: "object" + properties: + content: + type: "string" + source: + type: "string" + url: + type: "string" + format: "url" + relatedSearches: + type: "array" + items: + type: "string" + required: + - "searchResults" + instruction: "Extract the top 10 search results with titles, URLs, and snippets. Also extract featured snippet if present and related searches" + reasoning: "Testing extraction from Google search results page with various result types" + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Search results are relevant to the query" + - "Each result has a valid title, URL, and snippet" + - "URLs are properly resolved and not node IDs" + - "Related searches are extracted if present" + - "Featured snippet is captured when available" + +metadata: + tags: ["search", "google", "serp", "dynamic"] + priority: "high" + timeout: 45000 + retries: 2 + flaky: true + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/streamlined-schema-extractor/homedepot-001.yaml b/eval-server/nodejs/evals/streamlined-schema-extractor/homedepot-001.yaml new file mode 100644 index 0000000..1d26848 --- /dev/null +++ b/eval-server/nodejs/evals/streamlined-schema-extractor/homedepot-001.yaml @@ -0,0 +1,92 @@ +# Home Depot product search extraction test +id: "homedepot-001" +name: "Extract Home Depot Product Search" +description: "Extract product listings from Home Depot search results" +enabled: true + +target: + url: "https://www.homedepot.com/s/power%2520drill" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "extract_schema_streamlined" +timeout: 60000 + +input: + schema: + type: "object" + properties: + searchQuery: + type: "string" + totalResults: + type: "number" + products: + type: "array" + items: + type: "object" + properties: + name: + type: "string" + brand: + type: "string" + price: + type: "number" + originalPrice: + type: "number" + savings: + type: "number" + rating: + type: "number" + reviewCount: + type: "number" + productUrl: + type: "string" + format: "url" + imageUrl: + type: "string" + format: "url" + availability: + type: "string" + features: + type: "array" + items: + type: "string" + required: + - "name" + - "price" + - "productUrl" + filters: + type: "object" + properties: + brands: + type: "array" + items: + type: "string" + priceRanges: + type: "array" + items: + type: "string" + required: + - "products" + instruction: "Extract product listings from Home Depot search results including prices, ratings, and availability" + reasoning: "Testing extraction from e-commerce search results with product cards and filters" + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Products are relevant to the search query" + - "Prices are numeric values in USD" + - "Product URLs link to Home Depot product pages" + - "Ratings are on a 5-star scale" + - "Key product features are captured" + +metadata: + tags: ["ecommerce", "homedepot", "products", "search"] + priority: "high" + timeout: 60000 + retries: 3 + flaky: true + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/streamlined-schema-extractor/macys-001.yaml b/eval-server/nodejs/evals/streamlined-schema-extractor/macys-001.yaml new file mode 100644 index 0000000..28a2c10 --- /dev/null +++ b/eval-server/nodejs/evals/streamlined-schema-extractor/macys-001.yaml @@ -0,0 +1,106 @@ +# Macy's product listing extraction test +id: "macys-001" +name: "Extract Macy's Product Listings" +description: "Extract fashion products from Macy's category page" +enabled: true + +target: + url: "https://www.macys.com/shop/womens-clothing/womens-dresses" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "extract_schema_streamlined" +timeout: 60000 + +input: + schema: + type: "object" + properties: + category: + type: "string" + totalProducts: + type: "number" + products: + type: "array" + items: + type: "object" + properties: + name: + type: "string" + brand: + type: "string" + currentPrice: + type: "number" + originalPrice: + type: "number" + discount: + type: "string" + colors: + type: "array" + items: + type: "string" + sizes: + type: "array" + items: + type: "string" + rating: + type: "number" + reviewCount: + type: "number" + productUrl: + type: "string" + format: "url" + imageUrl: + type: "string" + format: "url" + promotions: + type: "array" + items: + type: "string" + required: + - "name" + - "brand" + - "currentPrice" + refinements: + type: "object" + properties: + brands: + type: "array" + items: + type: "string" + sizes: + type: "array" + items: + type: "string" + colors: + type: "array" + items: + type: "string" + priceRanges: + type: "array" + items: + type: "string" + required: + - "products" + instruction: "Extract fashion products including prices, sizes, colors, and promotional offers from Macy's" + reasoning: "Testing extraction from fashion e-commerce with complex product attributes" + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Products are from the correct category" + - "Prices reflect current and sale prices" + - "Color and size options are captured" + - "Brand names are accurately extracted" + - "Promotional text is included when present" + +metadata: + tags: ["ecommerce", "macys", "fashion", "products"] + priority: "high" + timeout: 60000 + retries: 3 + flaky: true + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/streamlined-schema-extractor/wikipedia-001.yaml b/eval-server/nodejs/evals/streamlined-schema-extractor/wikipedia-001.yaml new file mode 100644 index 0000000..88983bd --- /dev/null +++ b/eval-server/nodejs/evals/streamlined-schema-extractor/wikipedia-001.yaml @@ -0,0 +1,76 @@ +# Wikipedia article extraction test (Streamlined) +id: "wikipedia-chrome-devtools-001" +name: "Extract Chrome DevTools Wikipedia Article" +description: "Extract structured information from the Chrome DevTools Wikipedia page" +enabled: true + +target: + url: "https://en.wikipedia.org/wiki/Chrome_DevTools" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "extract_schema_streamlined" +timeout: 45000 + +input: + schema: + type: "object" + properties: + title: + type: "string" + summary: + type: "string" + tableOfContents: + type: "array" + items: + type: "string" + infobox: + type: "object" + properties: + developer: + type: "string" + initialRelease: + type: "string" + operatingSystem: + type: "string" + license: + type: "string" + externalLinks: + type: "array" + items: + type: "object" + properties: + text: + type: "string" + url: + type: "string" + format: "url" + required: + - "title" + - "summary" + instruction: "Extract the main article information including title, summary, table of contents, and infobox details" + reasoning: "Testing extraction from a stable, well-structured Wikipedia page" + +validation: + type: "hybrid" + snapshot: + exclude_paths: + - "externalLinks[*].url" + structure_only: false + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Article title matches the Wikipedia page title" + - "Summary captures the main description of Chrome DevTools" + - "Table of contents includes major sections" + - "Infobox contains key technical details" + - "External links are properly resolved URLs" + +metadata: + tags: ["wikipedia", "documentation", "stable"] + priority: "high" + timeout: 45000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/streamlined-schema-extractor/wikipedia-search-001.yaml b/eval-server/nodejs/evals/streamlined-schema-extractor/wikipedia-search-001.yaml new file mode 100644 index 0000000..c432c20 --- /dev/null +++ b/eval-server/nodejs/evals/streamlined-schema-extractor/wikipedia-search-001.yaml @@ -0,0 +1,77 @@ +# Wikipedia search results extraction test +id: "wikipedia-search-001" +name: "Extract Wikipedia Search Results" +description: "Extract search results from Wikipedia search" +enabled: true + +target: + url: "https://en.wikipedia.org/w/index.php?search=artificial+intelligence&title=Special:Search" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "extract_schema_streamlined" +timeout: 30000 + +input: + schema: + type: "object" + properties: + searchTerm: + type: "string" + resultCount: + type: "number" + searchResults: + type: "array" + items: + type: "object" + properties: + title: + type: "string" + url: + type: "string" + format: "url" + snippet: + type: "string" + category: + type: "string" + wordCount: + type: "number" + lastEdited: + type: "string" + required: + - "title" + - "url" + - "snippet" + suggestedArticles: + type: "array" + items: + type: "object" + properties: + title: + type: "string" + url: + type: "string" + format: "url" + required: + - "searchResults" + instruction: "Extract Wikipedia search results including article titles, URLs, snippets, and metadata like word count or last edit date" + reasoning: "Testing extraction from Wikipedia's internal search with rich metadata" + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4.1-mini" + temperature: 0.3 + criteria: + - "Search results are Wikipedia articles" + - "Each result has a valid Wikipedia URL" + - "Snippets contain relevant content highlights" + - "Metadata like word count is extracted when available" + +metadata: + tags: ["search", "wikipedia", "encyclopedia"] + priority: "high" + timeout: 30000 + retries: 2 + flaky: false + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/web-task-agent/booking-001.yaml b/eval-server/nodejs/evals/web-task-agent/booking-001.yaml new file mode 100644 index 0000000..8a99d17 --- /dev/null +++ b/eval-server/nodejs/evals/web-task-agent/booking-001.yaml @@ -0,0 +1,45 @@ +# Hotel Search Workflow - Web Task Agent +id: "booking-001" +name: "Hotel Search Workflow" +description: "Test web task agent orchestrating complex multi-step booking search" +enabled: true + +target: + url: "https://www.booking.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "web_task_agent" +timeout: 180000 + +input: + task: "Search for hotels in San Francisco for 2 adults, check-in March 15, check-out March 17" + reasoning: "Customer is looking for travel booking" + + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4o-mini" + temperature: 0.3 + criteria: + - "Successfully searched for hotels in San Francisco" + - "Results show hotels available for March 15-17 dates" + - "Guest count of 2 adults is reflected in the search results" + - "Returned multiple hotel options with relevant details" + - "Each hotel includes essential information (name, price, location)" + - "Results are presented in a clear, readable format" + visual_verification: + enabled: true + capture_before_action: true + capture_after_action: true + verification_prompts: + - "Verify hotel search results are displayed for San Francisco" + - "Check that dates March 15-17 are correctly selected" + - "Confirm guest count shows 2 adults" + - "Ensure search results show hotels with availability for specified dates" + +metadata: + tags: ["web-task", "booking", "workflow", "multi-step", "travel", "complex"] + priority: "high" + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/web-task-agent/ecommerce-001.yaml b/eval-server/nodejs/evals/web-task-agent/ecommerce-001.yaml new file mode 100644 index 0000000..338f464 --- /dev/null +++ b/eval-server/nodejs/evals/web-task-agent/ecommerce-001.yaml @@ -0,0 +1,53 @@ +# E-commerce web task evaluation (matches DevTools test case) +id: "ecommerce-001" +name: "E-commerce Product Search" +description: "Test web task agent handling product search on shopping site" +enabled: true + +target: + url: "https://www.amazon.com" + +tool: "web_task_agent" +timeout: 90000 + +input: + task: "Search Amazon for \"wireless headphones\" and find products under $100" + reasoning: "Testing e-commerce search workflow with price filtering" + context: "User wants to find wireless headphones with specific price constraint" + extraction_schema: + type: "object" + properties: + products: + type: "array" + items: + type: "object" + properties: + name: + type: "string" + price: + type: "string" + rating: + type: "string" + url: + type: "string" + + +validation: + type: "hybrid" + llm_judge: + model: "gpt-4o" + criteria: + - "Successfully navigated to product search" + - "Applied appropriate filters correctly" + - "Extracted product details accurately" + - "Provided meaningful comparison of features" + - "Stayed within specified price range" + snapshot: + structure_only: true + exclude_paths: + - "timestamp" + - "sessionId" + +metadata: + tags: ["web-task", "multi-step", "ecommerce", "search"] + priority: "high" \ No newline at end of file diff --git a/eval-server/nodejs/evals/web-task-agent/error-001.yaml b/eval-server/nodejs/evals/web-task-agent/error-001.yaml new file mode 100644 index 0000000..1831a14 --- /dev/null +++ b/eval-server/nodejs/evals/web-task-agent/error-001.yaml @@ -0,0 +1,45 @@ +# Error Recovery Workflow - Web Task Agent +id: "error-001" +name: "Error Recovery Workflow" +description: "Test web task agent handling action_agent failures and retry logic" +enabled: true + +target: + url: "https://www.google.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "web_task_agent" +timeout: 180000 + +input: + task: "Search for \"nonexistent test query 12345\" and handle any issues that arise" + reasoning: "Customer is asking for this response" + + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4o-mini" + temperature: 0.3 + criteria: + - "Attempted to search for the unusual query \"nonexistent test query 12345\"" + - "Either found some results OR provided clear explanation why no results were found" + - "Response handles the edge case gracefully without errors" + - "If no results found, suggested alternative actions or explanations" + - "Maintained professional tone despite unusual request" + - "Final output is coherent and helpful to the user" + visual_verification: + enabled: true + capture_before_action: true + capture_after_action: true + verification_prompts: + - "Check if search was attempted despite unusual query" + - "Verify error handling did not break the page interaction" + - "Confirm agent attempted to complete the task or provided clear error info" + - "Ensure page is still functional after error recovery attempts" + +metadata: + tags: ["web-task", "error-recovery", "retry", "orchestration", "robustness"] + priority: "high" + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/web-task-agent/extract-001.yaml b/eval-server/nodejs/evals/web-task-agent/extract-001.yaml new file mode 100644 index 0000000..e836aa0 --- /dev/null +++ b/eval-server/nodejs/evals/web-task-agent/extract-001.yaml @@ -0,0 +1,60 @@ +# Structured Data Extraction - Web Task Agent +id: "extract-001" +name: "Structured Data Extraction" +description: "Test web task agent extracting structured data from search results" +enabled: true + +target: + url: "https://news.ycombinator.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "web_task_agent" +timeout: 180000 + +input: + task: "Extract the top 5 Hacker News stories with their titles, scores, and comment counts" + reasoning: "User is looking to understand the top stories on Hacker News" + extraction_schema: + type: "object" + properties: + stories: + type: "array" + items: + type: "object" + properties: + title: + type: "string" + score: + type: "number" + comments: + type: "number" + url: + type: "string" + + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4o-mini" + temperature: 0.3 + criteria: + - "Successfully returned exactly 5 Hacker News stories in structured text format" + - "Each story is numbered (1., 2., 3., 4., 5.) with title, score, comments, and URL" + - "Results are presented in readable text format similar to the example provided" + - "Response includes all required fields: title, score, comments count, URL" + - "Maintained proper orchestration pattern throughout the extraction process" + visual_verification: + enabled: true + capture_before_action: true + capture_after_action: true + verification_prompts: + - "Verify Hacker News homepage is loaded and displaying stories" + - "Check that top stories are visible with scores and comment counts" + - "Confirm story titles and metadata are clearly displayed" + - "Ensure page structure allows for data extraction" + +metadata: + tags: ["web-task", "data-extraction", "structured-data", "hackernews", "schema"] + priority: "high" + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/web-task-agent/finance-001.yaml b/eval-server/nodejs/evals/web-task-agent/finance-001.yaml new file mode 100644 index 0000000..2c661ed --- /dev/null +++ b/eval-server/nodejs/evals/web-task-agent/finance-001.yaml @@ -0,0 +1,68 @@ +# Stock Information Research - Web Task Agent +id: "finance-001" +name: "Stock Information Research" +description: "Test extracting stock prices and financial information" +enabled: true + +target: + url: "https://finance.yahoo.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "web_task_agent" +timeout: 180000 + +input: + task: "Search for Apple (AAPL) stock information and extract current price, market cap, and recent performance" + reasoning: "Users need automated financial data collection for investment decisions" + extraction_schema: + type: "object" + properties: + stock_info: + type: "object" + properties: + symbol: + type: "string" + company_name: + type: "string" + current_price: + type: "string" + change: + type: "string" + change_percent: + type: "string" + market_cap: + type: "string" + pe_ratio: + type: "string" + volume: + type: "string" + + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4o-mini" + temperature: 0.3 + criteria: + - "Successfully found Apple (AAPL) stock information" + - "Current stock price is clearly stated" + - "Market cap information is included" + - "Price change and percentage change are provided" + - "Additional metrics (PE ratio, volume) included when available" + - "Financial data is current and presented in readable text format (not JSON)" + - "Stock information is well-organized and easy to understand" + visual_verification: + enabled: true + capture_before_action: true + capture_after_action: true + verification_prompts: + - "Verify Yahoo Finance shows Apple (AAPL) stock page" + - "Check that current stock price and change are visible" + - "Confirm market cap and trading volume are displayed" + - "Ensure financial metrics and charts are shown" + +metadata: + tags: ["web-task", "finance", "stocks", "yahoo-finance", "investment", "popular"] + priority: "high" + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/web-task-agent/flight-001.yaml b/eval-server/nodejs/evals/web-task-agent/flight-001.yaml new file mode 100644 index 0000000..f74b255 --- /dev/null +++ b/eval-server/nodejs/evals/web-task-agent/flight-001.yaml @@ -0,0 +1,45 @@ +# Complex Flight Search - Web Task Agent +id: "flight-001" +name: "Complex Flight Search" +description: "Test web task agent handling complex flight search with multiple criteria" +enabled: true + +target: + url: "https://www.kayak.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "web_task_agent" +timeout: 180000 + +input: + task: "Search for round-trip flights from Seattle (SEA) to Tokyo (NRT) departing March 20, returning March 30" + reasoning: "Customer is looking for finding the best flight options" + + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4o-mini" + temperature: 0.3 + criteria: + - "Successfully found round-trip flights from Seattle (SEA) to Tokyo (NRT)" + - "Flight results show March 20 departure date" + - "Flight results show March 30 return date" + - "Returned multiple flight options with airlines and prices" + - "Each flight includes essential details (times, airlines, prices)" + - "Results clearly distinguish between outbound and return flights" + visual_verification: + enabled: true + capture_before_action: true + capture_after_action: true + verification_prompts: + - "Verify flight search results are displayed" + - "Check SEA to NRT route is correctly selected" + - "Confirm dates March 20 departure and March 30 return" + - "Ensure flight options are showing with prices and airlines" + +metadata: + tags: ["web-task", "flight", "travel", "multi-step", "kayak", "round-trip"] + priority: "high" + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/web-task-agent/food-001.yaml b/eval-server/nodejs/evals/web-task-agent/food-001.yaml new file mode 100644 index 0000000..382b470 --- /dev/null +++ b/eval-server/nodejs/evals/web-task-agent/food-001.yaml @@ -0,0 +1,68 @@ +# Restaurant Search and Menu Extraction - Web Task Agent +id: "food-001" +name: "Restaurant Search and Menu Extraction" +description: "Test searching restaurants and extracting menu information" +enabled: true + +target: + url: "https://www.yelp.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "web_task_agent" +timeout: 180000 + +input: + task: "Search for \"Italian restaurants near me\" in San Francisco and extract restaurant details" + reasoning: "Users want to quickly compare restaurants, menus, and reviews" + extraction_schema: + type: "object" + properties: + restaurants: + type: "array" + items: + type: "object" + properties: + name: + type: "string" + rating: + type: "string" + price_range: + type: "string" + cuisine: + type: "string" + address: + type: "string" + phone: + type: "string" + hours: + type: "string" + + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4o-mini" + temperature: 0.3 + criteria: + - "Successfully found Italian restaurants in San Francisco" + - "Each restaurant includes name, rating, and price range" + - "Location/address information is provided for each restaurant" + - "Contact details (phone/hours) included when available" + - "All restaurants listed serve Italian cuisine" + - "Results are presented in clear, structured text format (not JSON)" + - "Restaurants are numbered or organized clearly for easy comparison" + visual_verification: + enabled: true + capture_before_action: true + capture_after_action: true + verification_prompts: + - "Verify Yelp search results for Italian restaurants" + - "Check that restaurants show ratings and price ranges" + - "Confirm location filter shows San Francisco results" + - "Ensure restaurant listings include contact information" + +metadata: + tags: ["web-task", "restaurants", "yelp", "food", "local-search", "popular"] + priority: "high" + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/web-task-agent/iframe-001.yaml b/eval-server/nodejs/evals/web-task-agent/iframe-001.yaml new file mode 100644 index 0000000..a9234e5 --- /dev/null +++ b/eval-server/nodejs/evals/web-task-agent/iframe-001.yaml @@ -0,0 +1,83 @@ +# ANA Airlines Iframe Content Extraction - Web Task Agent +id: "iframe-001" +name: "ANA Airlines Iframe Content Extraction" +description: "Test web task agent handling iframe-heavy airline booking sites like ANA Airlines" +enabled: true + +target: + url: "https://aswbe.ana.co.jp/webapps/reservation/flight-search?CONNECTION_KIND=SEA&LANG=en&hiddenSearchMode=ROUND_TRIP&departureDate:field=20260320&returnDate:field=20260330&departureAirportCode:field=SEA&arrivalAirportCode:field=NRT&adultCount=1&youngAdultCount=0&childCount=0&infantCount=0&boardingClass=INTY001&searchFlag=1" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "web_task_agent" +timeout: 180000 + +input: + task: "Navigate the ANA Airlines flight search page and extract available flight options from Seattle (SEA) to Tokyo Narita (NRT) for March 20-30, 2026. Handle any iframe content and booking interface elements." + reasoning: "Testing iframe content extraction and complex airline booking site navigation" + extraction_schema: + type: "object" + properties: + flights: + type: "array" + items: + type: "object" + properties: + flight_number: + type: "string" + airline: + type: "string" + departure_time: + type: "string" + arrival_time: + type: "string" + departure_date: + type: "string" + arrival_date: + type: "string" + duration: + type: "string" + aircraft: + type: "string" + price: + type: "string" + cabin_class: + type: "string" + stops: + type: "string" + booking_interface_status: + type: "string" + iframe_content_found: + type: "boolean" + + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4o-mini" + temperature: 0.3 + criteria: + - "Successfully navigated ANA Airlines booking interface" + - "Handled iframe content correctly (iframe_content_found should be true if iframes detected)" + - "Extracted flight information from ANA flight search results" + - "Flight details include ANA flight numbers and accurate route (SEA to NRT)" + - "Extracted pricing information in appropriate currency" + - "Handled any booking interface elements, popups, or navigation flows" + - "Results show flights for the correct dates (March 20-30, 2026)" + - "Successfully demonstrated iframe content extraction capabilities" + - "Booking interface status indicates successful page interaction" + visual_verification: + enabled: true + capture_before_action: true + capture_after_action: true + verification_prompts: + - "Verify ANA Airlines flight search page loaded correctly" + - "Check that search parameters show SEA to NRT route" + - "Confirm flight results are displayed (may be in iframes)" + - "Ensure booking interface elements are functional" + - "Verify flight information is accessible and extractable" + +metadata: + tags: ["web-task", "iframe", "ana-airlines", "complex-booking", "international-flight", "airline-specific"] + priority: "high" + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/web-task-agent/jobs-001.yaml b/eval-server/nodejs/evals/web-task-agent/jobs-001.yaml new file mode 100644 index 0000000..7a6caa8 --- /dev/null +++ b/eval-server/nodejs/evals/web-task-agent/jobs-001.yaml @@ -0,0 +1,68 @@ +# Job Search Workflow - Web Task Agent +id: "jobs-001" +name: "Job Search Workflow" +description: "Test web task agent orchestrating job search on LinkedIn" +enabled: true + +target: + url: "https://www.linkedin.com/jobs" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "web_task_agent" +timeout: 180000 + +input: + task: "Search for \"Software Engineer\" jobs in \"San Francisco\" and extract details for the first 5 results" + reasoning: "User wants to find job opportunities in tech industry" + extraction_schema: + type: "object" + properties: + jobs: + type: "array" + items: + type: "object" + properties: + title: + type: "string" + company: + type: "string" + location: + type: "string" + salary: + type: "string" + description: + type: "string" + url: + type: "string" + + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4o-mini" + temperature: 0.3 + criteria: + - "Either used construct_direct_url for LinkedIn job search OR used traditional form interaction" + - "If using direct URL: constructed proper LinkedIn job search URL with keywords and location" + - "If using forms: delegated keyword and location input to action_agent" + - "Extracted job listings using extract_data" + - "Returned structured job data in readable text format (not JSON)" + - "Each job listing includes title, company, location, and other relevant fields" + - "Results are numbered or organized clearly for easy reading" + - "Demonstrated proper workflow orchestration for job search" + - "Never used direct browser interaction tools" + visual_verification: + enabled: true + capture_before_action: true + capture_after_action: true + verification_prompts: + - "Verify LinkedIn job search results are displayed" + - "Check that search shows Software Engineer jobs in San Francisco" + - "Confirm job listings include company names and titles" + - "Ensure at least 5 job results are visible" + +metadata: + tags: ["web-task", "jobs", "linkedin", "search", "career", "popular"] + priority: "high" + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/web-task-agent/learning-001.yaml b/eval-server/nodejs/evals/web-task-agent/learning-001.yaml new file mode 100644 index 0000000..1e4c761 --- /dev/null +++ b/eval-server/nodejs/evals/web-task-agent/learning-001.yaml @@ -0,0 +1,69 @@ +# Online Course Search - Web Task Agent +id: "learning-001" +name: "Online Course Search" +description: "Test searching and extracting course information from learning platforms" +enabled: true + +target: + url: "https://www.coursera.org" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "web_task_agent" +timeout: 180000 + +input: + task: "Search for \"Machine Learning\" courses and extract details for top 5 results" + reasoning: "Users want to compare courses across platforms for learning decisions" + extraction_schema: + type: "object" + properties: + courses: + type: "array" + items: + type: "object" + properties: + title: + type: "string" + instructor: + type: "string" + university: + type: "string" + rating: + type: "string" + duration: + type: "string" + price: + type: "string" + description: + type: "string" + + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4o-mini" + temperature: 0.3 + criteria: + - "Successfully found Machine Learning courses on Coursera" + - "Returned details for top 5 courses as requested" + - "Each course includes title, instructor, university, and rating" + - "Duration and pricing information included for each course" + - "Course descriptions or key topics are provided" + - "Results are presented in structured text format (not JSON)" + - "Courses are numbered (1-5) and well-organized for easy comparison" + - "Each course entry is clearly formatted and readable" + visual_verification: + enabled: true + capture_before_action: true + capture_after_action: true + verification_prompts: + - "Verify Coursera search results for Machine Learning" + - "Check that courses show titles, instructors, and ratings" + - "Confirm course details include duration and pricing" + - "Ensure search results are relevant to Machine Learning" + +metadata: + tags: ["web-task", "education", "coursera", "courses", "learning", "popular"] + priority: "high" + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/web-task-agent/nav-001.yaml b/eval-server/nodejs/evals/web-task-agent/nav-001.yaml new file mode 100644 index 0000000..bff519f --- /dev/null +++ b/eval-server/nodejs/evals/web-task-agent/nav-001.yaml @@ -0,0 +1,46 @@ +# Site Navigation Workflow - Web Task Agent +id: "nav-001" +name: "Site Navigation Workflow" +description: "Test web task agent orchestrating navigation between different sections of a site" +enabled: true + +target: + url: "https://www.wikipedia.org" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "web_task_agent" +timeout: 90000 + +input: + task: "Navigate to the Wikipedia homepage, search for \"artificial intelligence\", and find information about machine learning" + reasoning: "User is looking to explore Wikipedia content through structured navigation" + + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4o-mini" + temperature: 0.3 + criteria: + - "Orchestrated Wikipedia search via action_agent calls" + - "Navigated to artificial intelligence article through action_agent" + - "Located machine learning section via action_agent coordination" + - "Extracted relevant information about machine learning" + - "Demonstrated multi-step navigation workflow" + - "Maintained orchestration pattern throughout navigation" + - "Provided structured summary of found information" + visual_verification: + enabled: true + capture_before_action: true + capture_after_action: true + verification_prompts: + - "Verify navigation reached artificial intelligence Wikipedia page" + - "Check that machine learning section or content is visible" + - "Confirm successful navigation through multiple page sections" + - "Ensure content related to machine learning is displayed" + +metadata: + tags: ["web-task", "navigation", "multi-step", "wikipedia", "content-exploration"] + priority: "high" + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/web-task-agent/news-001.yaml b/eval-server/nodejs/evals/web-task-agent/news-001.yaml new file mode 100644 index 0000000..4c29aed --- /dev/null +++ b/eval-server/nodejs/evals/web-task-agent/news-001.yaml @@ -0,0 +1,64 @@ +# News Article Aggregation - Web Task Agent +id: "news-001" +name: "News Article Aggregation" +description: "Test aggregating news headlines and summaries from news sites" +enabled: true + +target: + url: "https://news.ycombinator.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "web_task_agent" +timeout: 180000 + +input: + task: "Extract the top 10 Hacker News stories with titles, scores, and first few comments" + reasoning: "Users want automated news monitoring for research and awareness" + extraction_schema: + type: "object" + properties: + articles: + type: "array" + items: + type: "object" + properties: + title: + type: "string" + score: + type: "number" + comments_count: + type: "number" + url: + type: "string" + top_comment: + type: "string" + + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4o-mini" + temperature: 0.3 + criteria: + - "Successfully extracted 10 Hacker News stories as requested" + - "Each story includes title, score, and comment count" + - "URLs are provided for each story" + - "Stories appear to be from the current top/front page" + - "Results are presented in clear, numbered text format (1-10), not JSON" + - "All required fields are present and properly formatted in readable text" + - "Each story is clearly separated and easy to read" + visual_verification: + enabled: true + capture_before_action: true + capture_after_action: true + verification_prompts: + - "Verify Hacker News stories are visible with scores" + - "Check that story titles and comment counts are shown" + - "Confirm top stories section is properly displayed" + - "Ensure story metadata is accessible for extraction" + +metadata: + tags: ["web-task", "news", "hackernews", "aggregation", "popular"] + priority: "high" + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/web-task-agent/realestate-001.yaml b/eval-server/nodejs/evals/web-task-agent/realestate-001.yaml new file mode 100644 index 0000000..5fd824e --- /dev/null +++ b/eval-server/nodejs/evals/web-task-agent/realestate-001.yaml @@ -0,0 +1,70 @@ +# Real Estate Property Search - Web Task Agent +id: "realestate-001" +name: "Real Estate Property Search" +description: "Test property search workflow on real estate platforms" +enabled: true + +target: + url: "https://www.zillow.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "web_task_agent" +timeout: 180000 + +input: + task: "Search for houses for sale in Austin, Texas under $500k and extract property details" + reasoning: "User wants to find affordable housing options in a specific location" + extraction_schema: + type: "object" + properties: + properties: + type: "array" + items: + type: "object" + properties: + address: + type: "string" + price: + type: "string" + bedrooms: + type: "number" + bathrooms: + type: "number" + sqft: + type: "string" + lot_size: + type: "string" + year_built: + type: "string" + + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4o-mini" + temperature: 0.3 + criteria: + - "Orchestrated location search via action_agent" + - "Delegated price filter setting to action_agent" + - "Coordinated property type selection through action_agent" + - "Applied search filters through proper action_agent calls" + - "Extracted property listings with extract_data" + - "Returned structured property data in readable text format (not JSON)" + - "Each property includes address, price, bedrooms, bathrooms, and other key details" + - "Properties are clearly numbered or organized for easy comparison" + - "Demonstrated complex real estate search workflow orchestration" + visual_verification: + enabled: true + capture_before_action: true + capture_after_action: true + verification_prompts: + - "Verify Zillow search results for Austin, Texas properties" + - "Check that properties shown are under $500k" + - "Confirm property listings show price, beds, baths info" + - "Ensure search results match the specified criteria" + +metadata: + tags: ["web-task", "real-estate", "zillow", "property-search", "popular"] + priority: "high" + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/web-task-agent/scroll-001.yaml b/eval-server/nodejs/evals/web-task-agent/scroll-001.yaml new file mode 100644 index 0000000..12a986f --- /dev/null +++ b/eval-server/nodejs/evals/web-task-agent/scroll-001.yaml @@ -0,0 +1,61 @@ +# Infinite Scroll Content Loading - Web Task Agent +id: "scroll-001" +name: "Infinite Scroll Content Loading" +description: "Test web task agent handling infinite scroll pages to load more content" +enabled: true + +target: + url: "https://twitter.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "web_task_agent" +timeout: 180000 + +input: + task: "Scroll down the Twitter feed to load at least 20 tweets and extract their content" + reasoning: "Testing infinite scroll functionality for dynamic content loading" + extraction_schema: + type: "object" + properties: + tweets: + type: "array" + items: + type: "object" + properties: + author: + type: "string" + content: + type: "string" + likes: + type: "string" + retweets: + type: "string" + + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4o-mini" + temperature: 0.3 + criteria: + - "Successfully used scroll_page tool to scroll down the page" + - "Loaded additional content through scrolling actions" + - "Extracted at least 20 tweets from the feed" + - "Each tweet includes author and content information" + - "Demonstrated proper handling of dynamically loaded content" + - "Results are presented in clear, numbered text format" + visual_verification: + enabled: true + capture_before_action: true + capture_after_action: true + verification_prompts: + - "Verify initial Twitter feed is loaded" + - "Check that scrolling action loaded additional tweets" + - "Confirm at least 20 tweets are visible after scrolling" + - "Ensure page scrolled down significantly from initial position" + +metadata: + tags: ["web-task", "scrolling", "infinite-scroll", "dynamic-content", "twitter"] + priority: "high" + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/web-task-agent/scroll-002.yaml b/eval-server/nodejs/evals/web-task-agent/scroll-002.yaml new file mode 100644 index 0000000..dce0156 --- /dev/null +++ b/eval-server/nodejs/evals/web-task-agent/scroll-002.yaml @@ -0,0 +1,65 @@ +# Product Review Scrolling - Web Task Agent +id: "scroll-002" +name: "Product Review Scrolling" +description: "Test scrolling to load more product reviews on e-commerce sites" +enabled: true + +target: + url: "https://www.amazon.com/dp/B09B8V1LZ3" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "web_task_agent" +timeout: 180000 + +input: + task: "Scroll down to the reviews section and load more reviews by scrolling, then extract review details" + reasoning: "Users need to see multiple reviews beyond initial visible ones" + extraction_schema: + type: "object" + properties: + reviews: + type: "array" + items: + type: "object" + properties: + rating: + type: "string" + title: + type: "string" + author: + type: "string" + date: + type: "string" + verified: + type: "boolean" + content: + type: "string" + + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4o-mini" + temperature: 0.3 + criteria: + - "Used scroll_page tool to navigate to reviews section" + - "Scrolled within reviews area to load additional reviews" + - "Extracted multiple product reviews with ratings" + - "Each review includes rating, author, and content" + - "Successfully handled lazy-loaded review content" + - "Presented reviews in structured, readable format" + visual_verification: + enabled: true + capture_before_action: true + capture_after_action: true + verification_prompts: + - "Verify Amazon product page is loaded" + - "Check that page scrolled to reviews section" + - "Confirm additional reviews loaded after scrolling" + - "Ensure review content is fully visible" + +metadata: + tags: ["web-task", "scrolling", "reviews", "amazon", "e-commerce"] + priority: "high" + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/web-task-agent/scroll-003.yaml b/eval-server/nodejs/evals/web-task-agent/scroll-003.yaml new file mode 100644 index 0000000..df7eaba --- /dev/null +++ b/eval-server/nodejs/evals/web-task-agent/scroll-003.yaml @@ -0,0 +1,61 @@ +# News Article Progressive Loading - Web Task Agent +id: "scroll-003" +name: "News Article Progressive Loading" +description: "Test scrolling through news sites that load articles progressively" +enabled: true + +target: + url: "https://medium.com/topic/technology" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "web_task_agent" +timeout: 180000 + +input: + task: "Scroll down to load more technology articles and extract titles and authors for at least 15 articles" + reasoning: "Testing progressive content loading on news/blog platforms" + extraction_schema: + type: "object" + properties: + articles: + type: "array" + items: + type: "object" + properties: + title: + type: "string" + author: + type: "string" + reading_time: + type: "string" + preview: + type: "string" + + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4o-mini" + temperature: 0.3 + criteria: + - "Used scroll_page tool multiple times to load content" + - "Successfully loaded at least 15 articles through scrolling" + - "Extracted article titles and author information" + - "Handled Medium's progressive loading mechanism" + - "Articles are from technology topic as requested" + - "Results presented in clear, numbered format" + visual_verification: + enabled: true + capture_before_action: true + capture_after_action: true + verification_prompts: + - "Verify Medium technology page is loaded" + - "Check that initial articles are visible" + - "Confirm scrolling loaded additional articles" + - "Ensure at least 15 articles are visible after scrolling" + +metadata: + tags: ["web-task", "scrolling", "progressive-loading", "medium", "articles"] + priority: "high" + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/web-task-agent/scroll-004.yaml b/eval-server/nodejs/evals/web-task-agent/scroll-004.yaml new file mode 100644 index 0000000..e9b3534 --- /dev/null +++ b/eval-server/nodejs/evals/web-task-agent/scroll-004.yaml @@ -0,0 +1,61 @@ +# Search Results Infinite Scroll - Web Task Agent +id: "scroll-004" +name: "Search Results Infinite Scroll" +description: "Test handling search results that use infinite scroll instead of pagination" +enabled: true + +target: + url: "https://www.pinterest.com/search/pins/?q=web%20design" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "web_task_agent" +timeout: 180000 + +input: + task: "Search for \"web design\" pins and scroll to load at least 30 results, then extract pin details" + reasoning: "Testing infinite scroll on visual search platforms" + extraction_schema: + type: "object" + properties: + pins: + type: "array" + items: + type: "object" + properties: + title: + type: "string" + description: + type: "string" + saves: + type: "string" + source: + type: "string" + + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4o-mini" + temperature: 0.3 + criteria: + - "Successfully performed search for \"web design\" pins" + - "Used scroll_page tool to trigger infinite scroll loading" + - "Loaded at least 30 pins through scrolling actions" + - "Extracted pin titles and metadata" + - "Handled Pinterest's masonry layout and lazy loading" + - "Results are well-organized and readable" + visual_verification: + enabled: true + capture_before_action: true + capture_after_action: true + verification_prompts: + - "Verify Pinterest search results for web design" + - "Check initial pins are displayed" + - "Confirm scrolling loaded many more pins" + - "Ensure grid layout shows 30+ pins after scrolling" + +metadata: + tags: ["web-task", "scrolling", "infinite-scroll", "pinterest", "visual-search"] + priority: "high" + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/web-task-agent/scroll-005.yaml b/eval-server/nodejs/evals/web-task-agent/scroll-005.yaml new file mode 100644 index 0000000..47c8769 --- /dev/null +++ b/eval-server/nodejs/evals/web-task-agent/scroll-005.yaml @@ -0,0 +1,73 @@ +# Google Flights Scroll and Show More - Web Task Agent +id: "scroll-005" +name: "Google Flights Scroll and Show More" +description: "Test scrolling and clicking \"Show more flights\" button on Google Flights to load additional flight options" +enabled: true + +target: + url: "https://www.google.com/travel/flights?sca_esv=646eedf97dcc8cf2&source=flun&uitype=cuAA&hl=en&gl=us&curr=USD&tfs=CAEQAhoeEgoyMDI2LTAzLTIwagcIARIDU0VBcgcIARIDTlJUGh4SCjIwMjYtMDMtMzBqBwgBEgNOUlRyBwgBEgNTRUF6aENqUklhVFJJTVVwVlZVOXpNakJCUTJodGVFRkNSeTB0TFMwdExTMHRjR3BpYjI4eE0wRkJRVUZCUjJoc1lsWlZRV2RYUlZsQkVnTmpTMFVhQ3dqUXNnVVFBaG9EVlZORU9EQncwTElG" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "web_task_agent" +timeout: 180000 + +input: + task: "Extract the initial flight results, then scroll down and click \"Show more flights\" button to load additional flights. Extract at least 20 total flight options from Seattle to Tokyo." + reasoning: "Testing combination of scrolling and button clicking to load more flight results on Google Flights" + extraction_schema: + type: "object" + properties: + flights: + type: "array" + items: + type: "object" + properties: + airline: + type: "string" + departure_time: + type: "string" + arrival_time: + type: "string" + duration: + type: "string" + stops: + type: "string" + price: + type: "string" + aircraft: + type: "string" + total_flights_found: + type: "number" + + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4o-mini" + temperature: 0.3 + criteria: + - "Successfully extracted initial flight results from Google Flights" + - "Used scroll_page tool to scroll down the flight results list" + - "Located and clicked \"Show more flights\" button using action_agent" + - "Loaded additional flight options beyond the initial set" + - "Extracted at least 20 total flights from Seattle (SEA) to Tokyo (NRT)" + - "Each flight includes airline, times, duration, stops, and price" + - "Flights are for the correct dates (March 20-30, 2026)" + - "Results are presented in clear, numbered format" + - "Successfully combined scrolling and clicking actions to load more content" + visual_verification: + enabled: true + capture_before_action: true + capture_after_action: true + verification_prompts: + - "Verify Google Flights page shows SEA to NRT flights" + - "Check that initial flight results are displayed" + - "Confirm scrolling occurred and \"Show more flights\" button was visible" + - "Ensure additional flights loaded after clicking the button" + - "Verify at least 20 flight options are now visible" + +metadata: + tags: ["web-task", "scrolling", "google-flights", "click-action", "load-more", "travel"] + priority: "high" + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/web-task-agent/search-001.yaml b/eval-server/nodejs/evals/web-task-agent/search-001.yaml new file mode 100644 index 0000000..da3a4eb --- /dev/null +++ b/eval-server/nodejs/evals/web-task-agent/search-001.yaml @@ -0,0 +1,41 @@ +# Basic web task search evaluation (matches DevTools test case) +id: "search-001" +name: "Site-Specific Search Task" +description: "Test web task agent orchestrating a search workflow on a specific site" +enabled: true + +target: + url: "chrome://new-tab-page" + +tool: "web_task_agent" +timeout: 60000 + +input: + task: "Search Google for \"Chrome DevTools automation\" and extract the top 3 search results" + reasoning: "Testing basic site-specific search workflow orchestration" + context: "Need to demonstrate web_task_agent can coordinate multiple action_agent calls for a complete search workflow" + + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4o" + criteria: + - "Successfully returned exactly 3 search results in structured text format" + - "Each result is numbered (1., 2., 3.) and contains a title related to \"Chrome DevTools automation\"" + - "Each result includes a URL in the format \"URL: [link]\"" + - "Results are presented in a clear, readable text format (not JSON)" + - "Response includes a brief summary or conclusion statement" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify search was completed and results page is showing" + - "Check that search results are related to \"Chrome DevTools automation\"" + - "Confirm at least 3 search results are visible on the page" + - "Ensure the search workflow was completed successfully" + +metadata: + tags: ["web-task", "orchestration", "search", "workflow", "google", "basic"] + priority: "normal" \ No newline at end of file diff --git a/eval-server/nodejs/evals/web-task-agent/social-001.yaml b/eval-server/nodejs/evals/web-task-agent/social-001.yaml new file mode 100644 index 0000000..a35ebfd --- /dev/null +++ b/eval-server/nodejs/evals/web-task-agent/social-001.yaml @@ -0,0 +1,60 @@ +# Social Media Content Extraction - Web Task Agent +id: "social-001" +name: "Social Media Content Extraction" +description: "Test extracting trending topics and posts from social media" +enabled: true + +target: + url: "https://twitter.com/explore" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "web_task_agent" +timeout: 180000 + +input: + task: "Extract the top 5 trending topics from Twitter/X explore page" + reasoning: "User wants to stay updated on current trends" + extraction_schema: + type: "object" + properties: + trends: + type: "array" + items: + type: "object" + properties: + topic: + type: "string" + posts_count: + type: "string" + category: + type: "string" + + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4o-mini" + temperature: 0.3 + criteria: + - "Successfully accessed Twitter/X explore page and found trending topics" + - "Returned exactly 5 trending topics as requested" + - "Each topic includes the trend name/hashtag" + - "Post counts or metrics are included when available" + - "Topics are current/recent trends (not outdated)" + - "Results are presented in clear, numbered text format (not JSON)" + - "Each trend is properly numbered (1., 2., 3., etc.) for readability" + visual_verification: + enabled: true + capture_before_action: true + capture_after_action: true + verification_prompts: + - "Verify Twitter/X explore page is loaded" + - "Check that trending topics section is visible" + - "Confirm trending topics show names and post counts" + - "Ensure page shows current trending content" + +metadata: + tags: ["web-task", "social-media", "twitter", "trends", "extraction", "popular"] + priority: "high" + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/web-task-agent/web-task-agent-booking-001.yaml b/eval-server/nodejs/evals/web-task-agent/web-task-agent-booking-001.yaml new file mode 100644 index 0000000..a2842b6 --- /dev/null +++ b/eval-server/nodejs/evals/web-task-agent/web-task-agent-booking-001.yaml @@ -0,0 +1,45 @@ +# Hotel Search Workflow - Web Task Agent +id: "web-task-agent-booking-001" +name: "Hotel Search Workflow" +description: "Test web task agent orchestrating complex multi-step booking search" +enabled: true + +target: + url: "https://www.booking.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "web_task_agent" +timeout: 180000 + +input: + task: "Search for hotels in San Francisco for 2 adults, check-in March 15, check-out March 17" + reasoning: "Customer is looking for travel booking" + + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4o-mini" + temperature: 0.3 + criteria: + - "Successfully searched for hotels in San Francisco" + - "Results show hotels available for March 15-17 dates" + - "Guest count of 2 adults is reflected in the search results" + - "Returned multiple hotel options with relevant details" + - "Each hotel includes essential information (name, price, location)" + - "Results are presented in a clear, readable format" + visual_verification: + enabled: true + capture_before_action: true + capture_after_action: true + verification_prompts: + - "Verify hotel search results are displayed for San Francisco" + - "Check that dates March 15-17 are correctly selected" + - "Confirm guest count shows 2 adults" + - "Ensure search results show hotels with availability for specified dates" + +metadata: + tags: ["web-task", "booking", "workflow", "multi-step", "travel", "complex"] + priority: "high" + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/web-task-agent/web-task-agent-ecommerce-001.yaml b/eval-server/nodejs/evals/web-task-agent/web-task-agent-ecommerce-001.yaml new file mode 100644 index 0000000..a6b9735 --- /dev/null +++ b/eval-server/nodejs/evals/web-task-agent/web-task-agent-ecommerce-001.yaml @@ -0,0 +1,53 @@ +# E-commerce web task evaluation (matches DevTools test case) +id: "web-task-agent-ecommerce-001" +name: "E-commerce Product Search" +description: "Test web task agent handling product search on shopping site" +enabled: true + +target: + url: "https://www.amazon.com" + +tool: "web_task_agent" +timeout: 90000 + +input: + task: "Search Amazon for \"wireless headphones\" and find products under $100" + reasoning: "Testing e-commerce search workflow with price filtering" + context: "User wants to find wireless headphones with specific price constraint" + extraction_schema: + type: "object" + properties: + products: + type: "array" + items: + type: "object" + properties: + name: + type: "string" + price: + type: "string" + rating: + type: "string" + url: + type: "string" + + +validation: + type: "hybrid" + llm_judge: + model: "gpt-4o" + criteria: + - "Successfully navigated to product search" + - "Applied appropriate filters correctly" + - "Extracted product details accurately" + - "Provided meaningful comparison of features" + - "Stayed within specified price range" + snapshot: + structure_only: true + exclude_paths: + - "timestamp" + - "sessionId" + +metadata: + tags: ["web-task", "multi-step", "ecommerce", "search"] + priority: "high" \ No newline at end of file diff --git a/eval-server/nodejs/evals/web-task-agent/web-task-agent-error-001.yaml b/eval-server/nodejs/evals/web-task-agent/web-task-agent-error-001.yaml new file mode 100644 index 0000000..cc5c7df --- /dev/null +++ b/eval-server/nodejs/evals/web-task-agent/web-task-agent-error-001.yaml @@ -0,0 +1,45 @@ +# Error Recovery Workflow - Web Task Agent +id: "web-task-agent-error-001" +name: "Error Recovery Workflow" +description: "Test web task agent handling action_agent failures and retry logic" +enabled: true + +target: + url: "https://www.google.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "web_task_agent" +timeout: 180000 + +input: + task: "Search for \"nonexistent test query 12345\" and handle any issues that arise" + reasoning: "Customer is asking for this response" + + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4o-mini" + temperature: 0.3 + criteria: + - "Attempted to search for the unusual query \"nonexistent test query 12345\"" + - "Either found some results OR provided clear explanation why no results were found" + - "Response handles the edge case gracefully without errors" + - "If no results found, suggested alternative actions or explanations" + - "Maintained professional tone despite unusual request" + - "Final output is coherent and helpful to the user" + visual_verification: + enabled: true + capture_before_action: true + capture_after_action: true + verification_prompts: + - "Check if search was attempted despite unusual query" + - "Verify error handling did not break the page interaction" + - "Confirm agent attempted to complete the task or provided clear error info" + - "Ensure page is still functional after error recovery attempts" + +metadata: + tags: ["web-task", "error-recovery", "retry", "orchestration", "robustness"] + priority: "high" + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/web-task-agent/web-task-agent-extract-001.yaml b/eval-server/nodejs/evals/web-task-agent/web-task-agent-extract-001.yaml new file mode 100644 index 0000000..14eadcb --- /dev/null +++ b/eval-server/nodejs/evals/web-task-agent/web-task-agent-extract-001.yaml @@ -0,0 +1,60 @@ +# Structured Data Extraction - Web Task Agent +id: "web-task-agent-extract-001" +name: "Structured Data Extraction" +description: "Test web task agent extracting structured data from search results" +enabled: true + +target: + url: "https://news.ycombinator.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "web_task_agent" +timeout: 180000 + +input: + task: "Extract the top 5 Hacker News stories with their titles, scores, and comment counts" + reasoning: "User is looking to understand the top stories on Hacker News" + extraction_schema: + type: "object" + properties: + stories: + type: "array" + items: + type: "object" + properties: + title: + type: "string" + score: + type: "number" + comments: + type: "number" + url: + type: "string" + + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4o-mini" + temperature: 0.3 + criteria: + - "Successfully returned exactly 5 Hacker News stories in structured text format" + - "Each story is numbered (1., 2., 3., 4., 5.) with title, score, comments, and URL" + - "Results are presented in readable text format similar to the example provided" + - "Response includes all required fields: title, score, comments count, URL" + - "Maintained proper orchestration pattern throughout the extraction process" + visual_verification: + enabled: true + capture_before_action: true + capture_after_action: true + verification_prompts: + - "Verify Hacker News homepage is loaded and displaying stories" + - "Check that top stories are visible with scores and comment counts" + - "Confirm story titles and metadata are clearly displayed" + - "Ensure page structure allows for data extraction" + +metadata: + tags: ["web-task", "data-extraction", "structured-data", "hackernews", "schema"] + priority: "high" + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/web-task-agent/web-task-agent-finance-001.yaml b/eval-server/nodejs/evals/web-task-agent/web-task-agent-finance-001.yaml new file mode 100644 index 0000000..8f7a2b0 --- /dev/null +++ b/eval-server/nodejs/evals/web-task-agent/web-task-agent-finance-001.yaml @@ -0,0 +1,68 @@ +# Stock Information Research - Web Task Agent +id: "web-task-agent-finance-001" +name: "Stock Information Research" +description: "Test extracting stock prices and financial information" +enabled: true + +target: + url: "https://finance.yahoo.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "web_task_agent" +timeout: 180000 + +input: + task: "Search for Apple (AAPL) stock information and extract current price, market cap, and recent performance" + reasoning: "Users need automated financial data collection for investment decisions" + extraction_schema: + type: "object" + properties: + stock_info: + type: "object" + properties: + symbol: + type: "string" + company_name: + type: "string" + current_price: + type: "string" + change: + type: "string" + change_percent: + type: "string" + market_cap: + type: "string" + pe_ratio: + type: "string" + volume: + type: "string" + + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4o-mini" + temperature: 0.3 + criteria: + - "Successfully found Apple (AAPL) stock information" + - "Current stock price is clearly stated" + - "Market cap information is included" + - "Price change and percentage change are provided" + - "Additional metrics (PE ratio, volume) included when available" + - "Financial data is current and presented in readable text format (not JSON)" + - "Stock information is well-organized and easy to understand" + visual_verification: + enabled: true + capture_before_action: true + capture_after_action: true + verification_prompts: + - "Verify Yahoo Finance shows Apple (AAPL) stock page" + - "Check that current stock price and change are visible" + - "Confirm market cap and trading volume are displayed" + - "Ensure financial metrics and charts are shown" + +metadata: + tags: ["web-task", "finance", "stocks", "yahoo-finance", "investment", "popular"] + priority: "high" + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/web-task-agent/web-task-agent-flight-001.yaml b/eval-server/nodejs/evals/web-task-agent/web-task-agent-flight-001.yaml new file mode 100644 index 0000000..a17883f --- /dev/null +++ b/eval-server/nodejs/evals/web-task-agent/web-task-agent-flight-001.yaml @@ -0,0 +1,45 @@ +# Complex Flight Search - Web Task Agent +id: "web-task-agent-flight-001" +name: "Complex Flight Search" +description: "Test web task agent handling complex flight search with multiple criteria" +enabled: true + +target: + url: "https://www.kayak.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "web_task_agent" +timeout: 180000 + +input: + task: "Search for round-trip flights from Seattle (SEA) to Tokyo (NRT) departing March 20, returning March 30" + reasoning: "Customer is looking for finding the best flight options" + + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4o-mini" + temperature: 0.3 + criteria: + - "Successfully found round-trip flights from Seattle (SEA) to Tokyo (NRT)" + - "Flight results show March 20 departure date" + - "Flight results show March 30 return date" + - "Returned multiple flight options with airlines and prices" + - "Each flight includes essential details (times, airlines, prices)" + - "Results clearly distinguish between outbound and return flights" + visual_verification: + enabled: true + capture_before_action: true + capture_after_action: true + verification_prompts: + - "Verify flight search results are displayed" + - "Check SEA to NRT route is correctly selected" + - "Confirm dates March 20 departure and March 30 return" + - "Ensure flight options are showing with prices and airlines" + +metadata: + tags: ["web-task", "flight", "travel", "multi-step", "kayak", "round-trip"] + priority: "high" + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/web-task-agent/web-task-agent-food-001.yaml b/eval-server/nodejs/evals/web-task-agent/web-task-agent-food-001.yaml new file mode 100644 index 0000000..32ee646 --- /dev/null +++ b/eval-server/nodejs/evals/web-task-agent/web-task-agent-food-001.yaml @@ -0,0 +1,68 @@ +# Restaurant Search and Menu Extraction - Web Task Agent +id: "web-task-agent-food-001" +name: "Restaurant Search and Menu Extraction" +description: "Test searching restaurants and extracting menu information" +enabled: true + +target: + url: "https://www.yelp.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "web_task_agent" +timeout: 180000 + +input: + task: "Search for \"Italian restaurants near me\" in San Francisco and extract restaurant details" + reasoning: "Users want to quickly compare restaurants, menus, and reviews" + extraction_schema: + type: "object" + properties: + restaurants: + type: "array" + items: + type: "object" + properties: + name: + type: "string" + rating: + type: "string" + price_range: + type: "string" + cuisine: + type: "string" + address: + type: "string" + phone: + type: "string" + hours: + type: "string" + + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4o-mini" + temperature: 0.3 + criteria: + - "Successfully found Italian restaurants in San Francisco" + - "Each restaurant includes name, rating, and price range" + - "Location/address information is provided for each restaurant" + - "Contact details (phone/hours) included when available" + - "All restaurants listed serve Italian cuisine" + - "Results are presented in clear, structured text format (not JSON)" + - "Restaurants are numbered or organized clearly for easy comparison" + visual_verification: + enabled: true + capture_before_action: true + capture_after_action: true + verification_prompts: + - "Verify Yelp search results for Italian restaurants" + - "Check that restaurants show ratings and price ranges" + - "Confirm location filter shows San Francisco results" + - "Ensure restaurant listings include contact information" + +metadata: + tags: ["web-task", "restaurants", "yelp", "food", "local-search", "popular"] + priority: "high" + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/web-task-agent/web-task-agent-iframe-001.yaml b/eval-server/nodejs/evals/web-task-agent/web-task-agent-iframe-001.yaml new file mode 100644 index 0000000..30b0eac --- /dev/null +++ b/eval-server/nodejs/evals/web-task-agent/web-task-agent-iframe-001.yaml @@ -0,0 +1,83 @@ +# ANA Airlines Iframe Content Extraction - Web Task Agent +id: "web-task-agent-iframe-001" +name: "ANA Airlines Iframe Content Extraction" +description: "Test web task agent handling iframe-heavy airline booking sites like ANA Airlines" +enabled: true + +target: + url: "https://aswbe.ana.co.jp/webapps/reservation/flight-search?CONNECTION_KIND=SEA&LANG=en&hiddenSearchMode=ROUND_TRIP&departureDate:field=20260320&returnDate:field=20260330&departureAirportCode:field=SEA&arrivalAirportCode:field=NRT&adultCount=1&youngAdultCount=0&childCount=0&infantCount=0&boardingClass=INTY001&searchFlag=1" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "web_task_agent" +timeout: 180000 + +input: + task: "Navigate the ANA Airlines flight search page and extract available flight options from Seattle (SEA) to Tokyo Narita (NRT) for March 20-30, 2026. Handle any iframe content and booking interface elements." + reasoning: "Testing iframe content extraction and complex airline booking site navigation" + extraction_schema: + type: "object" + properties: + flights: + type: "array" + items: + type: "object" + properties: + flight_number: + type: "string" + airline: + type: "string" + departure_time: + type: "string" + arrival_time: + type: "string" + departure_date: + type: "string" + arrival_date: + type: "string" + duration: + type: "string" + aircraft: + type: "string" + price: + type: "string" + cabin_class: + type: "string" + stops: + type: "string" + booking_interface_status: + type: "string" + iframe_content_found: + type: "boolean" + + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4o-mini" + temperature: 0.3 + criteria: + - "Successfully navigated ANA Airlines booking interface" + - "Handled iframe content correctly (iframe_content_found should be true if iframes detected)" + - "Extracted flight information from ANA flight search results" + - "Flight details include ANA flight numbers and accurate route (SEA to NRT)" + - "Extracted pricing information in appropriate currency" + - "Handled any booking interface elements, popups, or navigation flows" + - "Results show flights for the correct dates (March 20-30, 2026)" + - "Successfully demonstrated iframe content extraction capabilities" + - "Booking interface status indicates successful page interaction" + visual_verification: + enabled: true + capture_before_action: true + capture_after_action: true + verification_prompts: + - "Verify ANA Airlines flight search page loaded correctly" + - "Check that search parameters show SEA to NRT route" + - "Confirm flight results are displayed (may be in iframes)" + - "Ensure booking interface elements are functional" + - "Verify flight information is accessible and extractable" + +metadata: + tags: ["web-task", "iframe", "ana-airlines", "complex-booking", "international-flight", "airline-specific"] + priority: "high" + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/web-task-agent/web-task-agent-jobs-001.yaml b/eval-server/nodejs/evals/web-task-agent/web-task-agent-jobs-001.yaml new file mode 100644 index 0000000..2c72df3 --- /dev/null +++ b/eval-server/nodejs/evals/web-task-agent/web-task-agent-jobs-001.yaml @@ -0,0 +1,68 @@ +# Job Search Workflow - Web Task Agent +id: "web-task-agent-jobs-001" +name: "Job Search Workflow" +description: "Test web task agent orchestrating job search on LinkedIn" +enabled: true + +target: + url: "https://www.linkedin.com/jobs" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "web_task_agent" +timeout: 180000 + +input: + task: "Search for \"Software Engineer\" jobs in \"San Francisco\" and extract details for the first 5 results" + reasoning: "User wants to find job opportunities in tech industry" + extraction_schema: + type: "object" + properties: + jobs: + type: "array" + items: + type: "object" + properties: + title: + type: "string" + company: + type: "string" + location: + type: "string" + salary: + type: "string" + description: + type: "string" + url: + type: "string" + + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4o-mini" + temperature: 0.3 + criteria: + - "Either used construct_direct_url for LinkedIn job search OR used traditional form interaction" + - "If using direct URL: constructed proper LinkedIn job search URL with keywords and location" + - "If using forms: delegated keyword and location input to action_agent" + - "Extracted job listings using extract_data" + - "Returned structured job data in readable text format (not JSON)" + - "Each job listing includes title, company, location, and other relevant fields" + - "Results are numbered or organized clearly for easy reading" + - "Demonstrated proper workflow orchestration for job search" + - "Never used direct browser interaction tools" + visual_verification: + enabled: true + capture_before_action: true + capture_after_action: true + verification_prompts: + - "Verify LinkedIn job search results are displayed" + - "Check that search shows Software Engineer jobs in San Francisco" + - "Confirm job listings include company names and titles" + - "Ensure at least 5 job results are visible" + +metadata: + tags: ["web-task", "jobs", "linkedin", "search", "career", "popular"] + priority: "high" + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/web-task-agent/web-task-agent-learning-001.yaml b/eval-server/nodejs/evals/web-task-agent/web-task-agent-learning-001.yaml new file mode 100644 index 0000000..8dcdc7d --- /dev/null +++ b/eval-server/nodejs/evals/web-task-agent/web-task-agent-learning-001.yaml @@ -0,0 +1,69 @@ +# Online Course Search - Web Task Agent +id: "web-task-agent-learning-001" +name: "Online Course Search" +description: "Test searching and extracting course information from learning platforms" +enabled: true + +target: + url: "https://www.coursera.org" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "web_task_agent" +timeout: 180000 + +input: + task: "Search for \"Machine Learning\" courses and extract details for top 5 results" + reasoning: "Users want to compare courses across platforms for learning decisions" + extraction_schema: + type: "object" + properties: + courses: + type: "array" + items: + type: "object" + properties: + title: + type: "string" + instructor: + type: "string" + university: + type: "string" + rating: + type: "string" + duration: + type: "string" + price: + type: "string" + description: + type: "string" + + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4o-mini" + temperature: 0.3 + criteria: + - "Successfully found Machine Learning courses on Coursera" + - "Returned details for top 5 courses as requested" + - "Each course includes title, instructor, university, and rating" + - "Duration and pricing information included for each course" + - "Course descriptions or key topics are provided" + - "Results are presented in structured text format (not JSON)" + - "Courses are numbered (1-5) and well-organized for easy comparison" + - "Each course entry is clearly formatted and readable" + visual_verification: + enabled: true + capture_before_action: true + capture_after_action: true + verification_prompts: + - "Verify Coursera search results for Machine Learning" + - "Check that courses show titles, instructors, and ratings" + - "Confirm course details include duration and pricing" + - "Ensure search results are relevant to Machine Learning" + +metadata: + tags: ["web-task", "education", "coursera", "courses", "learning", "popular"] + priority: "high" + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/web-task-agent/web-task-agent-nav-001.yaml b/eval-server/nodejs/evals/web-task-agent/web-task-agent-nav-001.yaml new file mode 100644 index 0000000..fdee2f4 --- /dev/null +++ b/eval-server/nodejs/evals/web-task-agent/web-task-agent-nav-001.yaml @@ -0,0 +1,46 @@ +# Site Navigation Workflow - Web Task Agent +id: "web-task-agent-nav-001" +name: "Site Navigation Workflow" +description: "Test web task agent orchestrating navigation between different sections of a site" +enabled: true + +target: + url: "https://www.wikipedia.org" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "web_task_agent" +timeout: 90000 + +input: + task: "Navigate to the Wikipedia homepage, search for \"artificial intelligence\", and find information about machine learning" + reasoning: "User is looking to explore Wikipedia content through structured navigation" + + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4o-mini" + temperature: 0.3 + criteria: + - "Orchestrated Wikipedia search via action_agent calls" + - "Navigated to artificial intelligence article through action_agent" + - "Located machine learning section via action_agent coordination" + - "Extracted relevant information about machine learning" + - "Demonstrated multi-step navigation workflow" + - "Maintained orchestration pattern throughout navigation" + - "Provided structured summary of found information" + visual_verification: + enabled: true + capture_before_action: true + capture_after_action: true + verification_prompts: + - "Verify navigation reached artificial intelligence Wikipedia page" + - "Check that machine learning section or content is visible" + - "Confirm successful navigation through multiple page sections" + - "Ensure content related to machine learning is displayed" + +metadata: + tags: ["web-task", "navigation", "multi-step", "wikipedia", "content-exploration"] + priority: "high" + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/web-task-agent/web-task-agent-news-001.yaml b/eval-server/nodejs/evals/web-task-agent/web-task-agent-news-001.yaml new file mode 100644 index 0000000..d9e1934 --- /dev/null +++ b/eval-server/nodejs/evals/web-task-agent/web-task-agent-news-001.yaml @@ -0,0 +1,64 @@ +# News Article Aggregation - Web Task Agent +id: "web-task-agent-news-001" +name: "News Article Aggregation" +description: "Test aggregating news headlines and summaries from news sites" +enabled: true + +target: + url: "https://news.ycombinator.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "web_task_agent" +timeout: 180000 + +input: + task: "Extract the top 10 Hacker News stories with titles, scores, and first few comments" + reasoning: "Users want automated news monitoring for research and awareness" + extraction_schema: + type: "object" + properties: + articles: + type: "array" + items: + type: "object" + properties: + title: + type: "string" + score: + type: "number" + comments_count: + type: "number" + url: + type: "string" + top_comment: + type: "string" + + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4o-mini" + temperature: 0.3 + criteria: + - "Successfully extracted 10 Hacker News stories as requested" + - "Each story includes title, score, and comment count" + - "URLs are provided for each story" + - "Stories appear to be from the current top/front page" + - "Results are presented in clear, numbered text format (1-10), not JSON" + - "All required fields are present and properly formatted in readable text" + - "Each story is clearly separated and easy to read" + visual_verification: + enabled: true + capture_before_action: true + capture_after_action: true + verification_prompts: + - "Verify Hacker News stories are visible with scores" + - "Check that story titles and comment counts are shown" + - "Confirm top stories section is properly displayed" + - "Ensure story metadata is accessible for extraction" + +metadata: + tags: ["web-task", "news", "hackernews", "aggregation", "popular"] + priority: "high" + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/web-task-agent/web-task-agent-realestate-001.yaml b/eval-server/nodejs/evals/web-task-agent/web-task-agent-realestate-001.yaml new file mode 100644 index 0000000..f22bc13 --- /dev/null +++ b/eval-server/nodejs/evals/web-task-agent/web-task-agent-realestate-001.yaml @@ -0,0 +1,70 @@ +# Real Estate Property Search - Web Task Agent +id: "web-task-agent-realestate-001" +name: "Real Estate Property Search" +description: "Test property search workflow on real estate platforms" +enabled: true + +target: + url: "https://www.zillow.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "web_task_agent" +timeout: 180000 + +input: + task: "Search for houses for sale in Austin, Texas under $500k and extract property details" + reasoning: "User wants to find affordable housing options in a specific location" + extraction_schema: + type: "object" + properties: + properties: + type: "array" + items: + type: "object" + properties: + address: + type: "string" + price: + type: "string" + bedrooms: + type: "number" + bathrooms: + type: "number" + sqft: + type: "string" + lot_size: + type: "string" + year_built: + type: "string" + + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4o-mini" + temperature: 0.3 + criteria: + - "Orchestrated location search via action_agent" + - "Delegated price filter setting to action_agent" + - "Coordinated property type selection through action_agent" + - "Applied search filters through proper action_agent calls" + - "Extracted property listings with extract_data" + - "Returned structured property data in readable text format (not JSON)" + - "Each property includes address, price, bedrooms, bathrooms, and other key details" + - "Properties are clearly numbered or organized for easy comparison" + - "Demonstrated complex real estate search workflow orchestration" + visual_verification: + enabled: true + capture_before_action: true + capture_after_action: true + verification_prompts: + - "Verify Zillow search results for Austin, Texas properties" + - "Check that properties shown are under $500k" + - "Confirm property listings show price, beds, baths info" + - "Ensure search results match the specified criteria" + +metadata: + tags: ["web-task", "real-estate", "zillow", "property-search", "popular"] + priority: "high" + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/web-task-agent/web-task-agent-scroll-001.yaml b/eval-server/nodejs/evals/web-task-agent/web-task-agent-scroll-001.yaml new file mode 100644 index 0000000..6fd0f6e --- /dev/null +++ b/eval-server/nodejs/evals/web-task-agent/web-task-agent-scroll-001.yaml @@ -0,0 +1,61 @@ +# Infinite Scroll Content Loading - Web Task Agent +id: "web-task-agent-scroll-001" +name: "Infinite Scroll Content Loading" +description: "Test web task agent handling infinite scroll pages to load more content" +enabled: true + +target: + url: "https://twitter.com" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "web_task_agent" +timeout: 180000 + +input: + task: "Scroll down the Twitter feed to load at least 20 tweets and extract their content" + reasoning: "Testing infinite scroll functionality for dynamic content loading" + extraction_schema: + type: "object" + properties: + tweets: + type: "array" + items: + type: "object" + properties: + author: + type: "string" + content: + type: "string" + likes: + type: "string" + retweets: + type: "string" + + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4o-mini" + temperature: 0.3 + criteria: + - "Successfully used scroll_page tool to scroll down the page" + - "Loaded additional content through scrolling actions" + - "Extracted at least 20 tweets from the feed" + - "Each tweet includes author and content information" + - "Demonstrated proper handling of dynamically loaded content" + - "Results are presented in clear, numbered text format" + visual_verification: + enabled: true + capture_before_action: true + capture_after_action: true + verification_prompts: + - "Verify initial Twitter feed is loaded" + - "Check that scrolling action loaded additional tweets" + - "Confirm at least 20 tweets are visible after scrolling" + - "Ensure page scrolled down significantly from initial position" + +metadata: + tags: ["web-task", "scrolling", "infinite-scroll", "dynamic-content", "twitter"] + priority: "high" + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/web-task-agent/web-task-agent-scroll-002.yaml b/eval-server/nodejs/evals/web-task-agent/web-task-agent-scroll-002.yaml new file mode 100644 index 0000000..d5d060a --- /dev/null +++ b/eval-server/nodejs/evals/web-task-agent/web-task-agent-scroll-002.yaml @@ -0,0 +1,65 @@ +# Product Review Scrolling - Web Task Agent +id: "web-task-agent-scroll-002" +name: "Product Review Scrolling" +description: "Test scrolling to load more product reviews on e-commerce sites" +enabled: true + +target: + url: "https://www.amazon.com/dp/B08N5WRWNW" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "web_task_agent" +timeout: 180000 + +input: + task: "Scroll down to the reviews section and load more reviews by scrolling, then extract review details" + reasoning: "Users need to see multiple reviews beyond initial visible ones" + extraction_schema: + type: "object" + properties: + reviews: + type: "array" + items: + type: "object" + properties: + rating: + type: "string" + title: + type: "string" + author: + type: "string" + date: + type: "string" + verified: + type: "boolean" + content: + type: "string" + + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4o-mini" + temperature: 0.3 + criteria: + - "Used scroll_page tool to navigate to reviews section" + - "Scrolled within reviews area to load additional reviews" + - "Extracted multiple product reviews with ratings" + - "Each review includes rating, author, and content" + - "Successfully handled lazy-loaded review content" + - "Presented reviews in structured, readable format" + visual_verification: + enabled: true + capture_before_action: true + capture_after_action: true + verification_prompts: + - "Verify Amazon product page is loaded" + - "Check that page scrolled to reviews section" + - "Confirm additional reviews loaded after scrolling" + - "Ensure review content is fully visible" + +metadata: + tags: ["web-task", "scrolling", "reviews", "amazon", "e-commerce"] + priority: "high" + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/web-task-agent/web-task-agent-scroll-003.yaml b/eval-server/nodejs/evals/web-task-agent/web-task-agent-scroll-003.yaml new file mode 100644 index 0000000..f435017 --- /dev/null +++ b/eval-server/nodejs/evals/web-task-agent/web-task-agent-scroll-003.yaml @@ -0,0 +1,61 @@ +# News Article Progressive Loading - Web Task Agent +id: "web-task-agent-scroll-003" +name: "News Article Progressive Loading" +description: "Test scrolling through news sites that load articles progressively" +enabled: true + +target: + url: "https://medium.com/topic/technology" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "web_task_agent" +timeout: 180000 + +input: + task: "Scroll down to load more technology articles and extract titles and authors for at least 15 articles" + reasoning: "Testing progressive content loading on news/blog platforms" + extraction_schema: + type: "object" + properties: + articles: + type: "array" + items: + type: "object" + properties: + title: + type: "string" + author: + type: "string" + reading_time: + type: "string" + preview: + type: "string" + + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4o-mini" + temperature: 0.3 + criteria: + - "Used scroll_page tool multiple times to load content" + - "Successfully loaded at least 15 articles through scrolling" + - "Extracted article titles and author information" + - "Handled Medium's progressive loading mechanism" + - "Articles are from technology topic as requested" + - "Results presented in clear, numbered format" + visual_verification: + enabled: true + capture_before_action: true + capture_after_action: true + verification_prompts: + - "Verify Medium technology page is loaded" + - "Check that initial articles are visible" + - "Confirm scrolling loaded additional articles" + - "Ensure at least 15 articles are visible after scrolling" + +metadata: + tags: ["web-task", "scrolling", "progressive-loading", "medium", "articles"] + priority: "high" + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/web-task-agent/web-task-agent-scroll-004.yaml b/eval-server/nodejs/evals/web-task-agent/web-task-agent-scroll-004.yaml new file mode 100644 index 0000000..5970947 --- /dev/null +++ b/eval-server/nodejs/evals/web-task-agent/web-task-agent-scroll-004.yaml @@ -0,0 +1,61 @@ +# Search Results Infinite Scroll - Web Task Agent +id: "web-task-agent-scroll-004" +name: "Search Results Infinite Scroll" +description: "Test handling search results that use infinite scroll instead of pagination" +enabled: true + +target: + url: "https://www.pinterest.com/search/pins/?q=web%20design" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "web_task_agent" +timeout: 180000 + +input: + task: "Search for \"web design\" pins and scroll to load at least 30 results, then extract pin details" + reasoning: "Testing infinite scroll on visual search platforms" + extraction_schema: + type: "object" + properties: + pins: + type: "array" + items: + type: "object" + properties: + title: + type: "string" + description: + type: "string" + saves: + type: "string" + source: + type: "string" + + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4o-mini" + temperature: 0.3 + criteria: + - "Successfully performed search for \"web design\" pins" + - "Used scroll_page tool to trigger infinite scroll loading" + - "Loaded at least 30 pins through scrolling actions" + - "Extracted pin titles and metadata" + - "Handled Pinterest's masonry layout and lazy loading" + - "Results are well-organized and readable" + visual_verification: + enabled: true + capture_before_action: true + capture_after_action: true + verification_prompts: + - "Verify Pinterest search results for web design" + - "Check initial pins are displayed" + - "Confirm scrolling loaded many more pins" + - "Ensure grid layout shows 30+ pins after scrolling" + +metadata: + tags: ["web-task", "scrolling", "infinite-scroll", "pinterest", "visual-search"] + priority: "high" + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/web-task-agent/web-task-agent-scroll-005.yaml b/eval-server/nodejs/evals/web-task-agent/web-task-agent-scroll-005.yaml new file mode 100644 index 0000000..e603ff7 --- /dev/null +++ b/eval-server/nodejs/evals/web-task-agent/web-task-agent-scroll-005.yaml @@ -0,0 +1,73 @@ +# Google Flights Scroll and Show More - Web Task Agent +id: "web-task-agent-scroll-005" +name: "Google Flights Scroll and Show More" +description: "Test scrolling and clicking \"Show more flights\" button on Google Flights to load additional flight options" +enabled: true + +target: + url: "https://www.google.com/travel/flights?sca_esv=646eedf97dcc8cf2&source=flun&uitype=cuAA&hl=en&gl=us&curr=USD&tfs=CAEQAhoeEgoyMDI2LTAzLTIwagcIARIDU0VBcgcIARIDTlJUGh4SCjIwMjYtMDMtMzBqBwgBEgNOUlRyBwgBEgNTRUF6aENqUklhVFJJTVVwVlZVOXpNakJCUTJodGVFRkNSeTB0TFMwdExTMHRjR3BpYjI4eE0wRkJRVUZCUjJoc1lsWlZRV2RYUlZsQkVnTmpTMFVhQ3dqUXNnVVFBaG9EVlZORU9EQncwTElG" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "web_task_agent" +timeout: 180000 + +input: + task: "Extract the initial flight results, then scroll down and click \"Show more flights\" button to load additional flights. Extract at least 20 total flight options from Seattle to Tokyo." + reasoning: "Testing combination of scrolling and button clicking to load more flight results on Google Flights" + extraction_schema: + type: "object" + properties: + flights: + type: "array" + items: + type: "object" + properties: + airline: + type: "string" + departure_time: + type: "string" + arrival_time: + type: "string" + duration: + type: "string" + stops: + type: "string" + price: + type: "string" + aircraft: + type: "string" + total_flights_found: + type: "number" + + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4o-mini" + temperature: 0.3 + criteria: + - "Successfully extracted initial flight results from Google Flights" + - "Used scroll_page tool to scroll down the flight results list" + - "Located and clicked \"Show more flights\" button using action_agent" + - "Loaded additional flight options beyond the initial set" + - "Extracted at least 20 total flights from Seattle (SEA) to Tokyo (NRT)" + - "Each flight includes airline, times, duration, stops, and price" + - "Flights are for the correct dates (March 20-30, 2026)" + - "Results are presented in clear, numbered format" + - "Successfully combined scrolling and clicking actions to load more content" + visual_verification: + enabled: true + capture_before_action: true + capture_after_action: true + verification_prompts: + - "Verify Google Flights page shows SEA to NRT flights" + - "Check that initial flight results are displayed" + - "Confirm scrolling occurred and \"Show more flights\" button was visible" + - "Ensure additional flights loaded after clicking the button" + - "Verify at least 20 flight options are now visible" + +metadata: + tags: ["web-task", "scrolling", "google-flights", "click-action", "load-more", "travel"] + priority: "high" + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/evals/web-task-agent/web-task-agent-search-001.yaml b/eval-server/nodejs/evals/web-task-agent/web-task-agent-search-001.yaml new file mode 100644 index 0000000..50dc920 --- /dev/null +++ b/eval-server/nodejs/evals/web-task-agent/web-task-agent-search-001.yaml @@ -0,0 +1,41 @@ +# Basic web task search evaluation (matches DevTools test case) +id: "web-task-agent-search-001" +name: "Site-Specific Search Task" +description: "Test web task agent orchestrating a search workflow on a specific site" +enabled: true + +target: + url: "chrome://new-tab-page" + +tool: "web_task_agent" +timeout: 60000 + +input: + task: "Search Google for \"Chrome DevTools automation\" and extract the top 3 search results" + reasoning: "Testing basic site-specific search workflow orchestration" + context: "Need to demonstrate web_task_agent can coordinate multiple action_agent calls for a complete search workflow" + + +validation: + type: "llm-judge" + llm_judge: + model: "gpt-4o" + criteria: + - "Successfully returned exactly 3 search results in structured text format" + - "Each result is numbered (1., 2., 3.) and contains a title related to \"Chrome DevTools automation\"" + - "Each result includes a URL in the format \"URL: [link]\"" + - "Results are presented in a clear, readable text format (not JSON)" + - "Response includes a brief summary or conclusion statement" + visual_verification: + enabled: true + capture_before: true + capture_after: true + prompts: + - "Verify search was completed and results page is showing" + - "Check that search results are related to \"Chrome DevTools automation\"" + - "Confirm at least 3 search results are visible on the page" + - "Ensure the search workflow was completed successfully" + +metadata: + tags: ["web-task", "orchestration", "search", "workflow", "google", "basic"] + priority: "normal" \ No newline at end of file diff --git a/eval-server/nodejs/evals/web-task-agent/web-task-agent-social-001.yaml b/eval-server/nodejs/evals/web-task-agent/web-task-agent-social-001.yaml new file mode 100644 index 0000000..f1f969e --- /dev/null +++ b/eval-server/nodejs/evals/web-task-agent/web-task-agent-social-001.yaml @@ -0,0 +1,60 @@ +# Social Media Content Extraction - Web Task Agent +id: "web-task-agent-social-001" +name: "Social Media Content Extraction" +description: "Test extracting trending topics and posts from social media" +enabled: true + +target: + url: "https://twitter.com/explore" + wait_for: "networkidle" + wait_timeout: 5000 + +tool: "web_task_agent" +timeout: 180000 + +input: + task: "Extract the top 5 trending topics from Twitter/X explore page" + reasoning: "User wants to stay updated on current trends" + extraction_schema: + type: "object" + properties: + trends: + type: "array" + items: + type: "object" + properties: + topic: + type: "string" + posts_count: + type: "string" + category: + type: "string" + + +validation: + type: "llm_judge" + llm_judge: + model: "gpt-4o-mini" + temperature: 0.3 + criteria: + - "Successfully accessed Twitter/X explore page and found trending topics" + - "Returned exactly 5 trending topics as requested" + - "Each topic includes the trend name/hashtag" + - "Post counts or metrics are included when available" + - "Topics are current/recent trends (not outdated)" + - "Results are presented in clear, numbered text format (not JSON)" + - "Each trend is properly numbered (1., 2., 3., etc.) for readability" + visual_verification: + enabled: true + capture_before_action: true + capture_after_action: true + verification_prompts: + - "Verify Twitter/X explore page is loaded" + - "Check that trending topics section is visible" + - "Confirm trending topics show names and post counts" + - "Ensure page shows current trending content" + +metadata: + tags: ["web-task", "social-media", "twitter", "trends", "extraction", "popular"] + priority: "high" + owner: "devtools-team" \ No newline at end of file diff --git a/eval-server/nodejs/examples/clients/1233ae25-9f9e-4f77-924d-865f7d615cef.yaml b/eval-server/nodejs/examples/clients/1233ae25-9f9e-4f77-924d-865f7d615cef.yaml new file mode 100644 index 0000000..f5b865f --- /dev/null +++ b/eval-server/nodejs/examples/clients/1233ae25-9f9e-4f77-924d-865f7d615cef.yaml @@ -0,0 +1,12 @@ +client: + id: 1233ae25-9f9e-4f77-924d-865f7d615cef + name: DevTools Client 1233ae25 + secret_key: hello + description: Auto-generated DevTools evaluation client +settings: + max_concurrent_evaluations: 3 + default_timeout: 45000 + retry_policy: + max_retries: 2 + backoff_multiplier: 2 + initial_delay: 1000 diff --git a/eval-server/nodejs/examples/library-usage.js b/eval-server/nodejs/examples/library-usage.js new file mode 100644 index 0000000..cfb3ffd --- /dev/null +++ b/eval-server/nodejs/examples/library-usage.js @@ -0,0 +1,250 @@ +#!/usr/bin/env node + +// Copyright 2025 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Simple example demonstrating the programmatic API usage + +import { EvalServer } from '../src/lib/EvalServer.js'; +import { CONFIG } from '../src/config.js'; + +console.log('๐Ÿ”ง Creating server...'); +const server = new EvalServer({ + authKey: 'hello', + host: '127.0.0.1', + port: 8080 +}); + +console.log('๐Ÿ”ง Setting up event handlers...'); + +server.on('started', (info) => { + console.log('โœ… Server started event fired:', info); +}); + +server.on('error', (error) => { + console.log('โŒ Server error:', error); +}); + +server.onConnect(async client => { + console.log('๐ŸŽ‰ CLIENT CONNECTED!'); + console.log(' - Client ID:', client.id); + console.log(' - Client tabId:', client.tabId); + console.log(' - Client info:', client.getInfo()); + + // Check available LLM providers + console.log('\n๐Ÿ”‘ Available LLM Providers:'); + const availableProviders = []; + if (CONFIG.providers.openai.apiKey) { + availableProviders.push('openai'); + console.log(' โœ… OpenAI configured'); + } + if (CONFIG.providers.groq.apiKey) { + availableProviders.push('groq'); + console.log(' โœ… Groq configured'); + } + if (CONFIG.providers.openrouter.apiKey) { + availableProviders.push('openrouter'); + console.log(' โœ… OpenRouter configured'); + } + if (CONFIG.providers.litellm.apiKey && CONFIG.providers.litellm.endpoint) { + availableProviders.push('litellm'); + console.log(' โœ… LiteLLM configured'); + } + + if (availableProviders.length === 0) { + console.log(' โŒ No providers configured. Add API keys to .env file.'); + console.log(' โ„น๏ธ Example: OPENAI_API_KEY=sk-your-key-here'); + } + + try { + // Demonstrate basic evaluation first + console.log('\n๐Ÿ”„ Starting basic evaluation...'); + let response = await client.evaluate({ + id: "basic_eval", + name: "Capital of France", + description: "Basic test evaluation", + tool: "chat", + input: { + message: "What is the capital of France?" + } + }); + + console.log('โœ… Basic evaluation completed!'); + console.log('๐Ÿ“Š Response:', JSON.stringify(response, null, 2)); + + // Demonstrate explicit model selection if OpenAI is available + if (CONFIG.providers.openai.apiKey) { + await demonstrateModelSelection(client); + } + + // Demonstrate LLM configuration if providers are available + if (availableProviders.length > 0) { + await demonstrateLLMConfiguration(client, availableProviders); + } + + } catch (error) { + console.log('โŒ Evaluation failed:', error.message); + } +}); + +server.onDisconnect(clientInfo => { + console.log('๐Ÿ‘‹ CLIENT DISCONNECTED:', clientInfo); +}); + +// Function to demonstrate explicit model selection within OpenAI +async function demonstrateModelSelection(client) { + console.log('\n๐Ÿค– Demonstrating Model Selection (OpenAI)...'); + + const modelTests = [ + { + model: 'gpt-4', + task: 'Complex reasoning', + message: 'Solve this step by step: If a train travels 60 mph for 2.5 hours, how far does it go?' + }, + { + model: 'gpt-4-mini', + task: 'Simple question', + message: 'What is 2 + 2?' + }, + { + model: 'gpt-3.5-turbo', + task: 'Creative writing', + message: 'Write a one-sentence story about a cat.' + } + ]; + + for (const test of modelTests) { + console.log(`\n๐Ÿ”ง Testing ${test.model} for ${test.task}...`); + + try { + const response = await client.evaluate({ + id: `model_test_${test.model.replace(/[^a-z0-9]/g, '_')}`, + name: `${test.model} ${test.task}`, + tool: "chat", + input: { + message: test.message + }, + model: { + main_model: { + provider: "openai", + model: test.model, + api_key: CONFIG.providers.openai.apiKey + } + } + }); + + console.log(` โœ… ${test.model} completed successfully`); + console.log(` ๐Ÿ“Š Response: ${JSON.stringify(response.output).substring(0, 100)}...`); + + // Wait between tests + await new Promise(resolve => setTimeout(resolve, 1500)); + + } catch (error) { + console.log(` โŒ ${test.model} failed: ${error.message}`); + } + } + + console.log('\nโœจ Model selection demonstration completed!'); +} + +// Function to demonstrate LLM configuration +async function demonstrateLLMConfiguration(client, availableProviders) { + console.log('\n๐Ÿงช Demonstrating LLM Configuration...'); + + for (const provider of availableProviders.slice(0, 2)) { // Test up to 2 providers + console.log(`\n๐Ÿ”ง Configuring ${provider.toUpperCase()} provider...`); + + try { + // Configure different models based on provider + let models; + switch (provider) { + case 'openai': + models = { + main: 'gpt-4', + mini: 'gpt-4-mini', + nano: 'gpt-3.5-turbo' + }; + break; + case 'groq': + models = { + main: 'llama-3.1-8b-instant', + mini: 'llama-3.1-8b-instant', + nano: 'llama-3.1-8b-instant' + }; + break; + case 'openrouter': + models = { + main: 'anthropic/claude-3-sonnet', + mini: 'anthropic/claude-3-haiku', + nano: 'anthropic/claude-3-haiku' + }; + break; + case 'litellm': + models = { + main: 'claude-3-sonnet-20240229', + mini: 'claude-3-haiku-20240307', + nano: 'claude-3-haiku-20240307' + }; + break; + } + + console.log(` ๐Ÿ“ฆ Models: main=${models.main}, mini=${models.mini}, nano=${models.nano}`); + + // Run evaluation with specific provider configuration + const response = await client.evaluate({ + id: `${provider}_config_eval`, + name: `${provider.toUpperCase()} Configuration Test`, + description: `Test evaluation using ${provider} provider`, + tool: "chat", + input: { + message: `Hello! This is a test using the ${provider} provider. Please respond with a brief confirmation.` + }, + model: { + main_model: { + provider: provider, + model: models.main, + api_key: CONFIG.providers[provider].apiKey, + endpoint: CONFIG.providers[provider].endpoint + }, + mini_model: { + provider: provider, + model: models.mini, + api_key: CONFIG.providers[provider].apiKey, + endpoint: CONFIG.providers[provider].endpoint + }, + nano_model: { + provider: provider, + model: models.nano, + api_key: CONFIG.providers[provider].apiKey, + endpoint: CONFIG.providers[provider].endpoint + } + } + }); + + console.log(` โœ… ${provider.toUpperCase()} evaluation completed successfully`); + console.log(` ๐Ÿ“Š Response preview: ${JSON.stringify(response.output).substring(0, 100)}...`); + + // Wait between provider tests + await new Promise(resolve => setTimeout(resolve, 2000)); + + } catch (error) { + console.log(` โŒ ${provider.toUpperCase()} configuration test failed:`, error.message); + } + } + + console.log('\nโœจ LLM configuration demonstration completed!'); +} + +console.log('๐Ÿ”ง Starting server...'); +await server.start(); +console.log('โœ… Server started successfully on ws://127.0.0.1:8080'); +console.log('โณ Waiting for DevTools client to connect...'); +console.log(' WebSocket URL: ws://127.0.0.1:8080'); +console.log(' Auth Key: hello'); + +// Add periodic status check +setInterval(() => { + const status = server.getStatus(); + console.log(`๐Ÿ“Š Status: ${status.connectedClients} clients, ${status.readyClients} ready`); +}, 10000); \ No newline at end of file diff --git a/eval-server/nodejs/examples/logs/.gitignore b/eval-server/nodejs/examples/logs/.gitignore new file mode 100644 index 0000000..9309608 --- /dev/null +++ b/eval-server/nodejs/examples/logs/.gitignore @@ -0,0 +1,3 @@ +combined.log +error.log +evaluations.jsonl \ No newline at end of file diff --git a/eval-server/nodejs/examples/multiple-evals.js b/eval-server/nodejs/examples/multiple-evals.js new file mode 100755 index 0000000..b65522f --- /dev/null +++ b/eval-server/nodejs/examples/multiple-evals.js @@ -0,0 +1,167 @@ +#!/usr/bin/env node + +// Copyright 2025 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Example demonstrating multiple evaluations using a stack-based approach +// Each connecting client receives a different evaluation from the stack + +import { EvalServer } from '../src/lib/EvalServer.js'; +import { EvaluationStack } from '../src/lib/EvaluationStack.js'; +import { CONFIG } from '../src/config.js'; + +console.log('๐Ÿ”ง Creating evaluation stack...'); +const evalStack = new EvaluationStack(); + +// Create multiple diverse evaluations for the stack with different LLM configurations +const evaluations = [ + { + id: "math_eval", + name: "Basic Math Problem", + description: "Simple arithmetic evaluation", + tool: "chat", + input: { + message: "What is 15 * 7 + 23? Please show your calculation steps." + }, + // Use OpenAI if available, otherwise default + model: CONFIG.providers.openai.apiKey ? { + main_model: { + provider: 'openai', + model: 'gpt-4', + api_key: CONFIG.providers.openai.apiKey + } + } : {} + }, + { + id: "geography_eval", + name: "Capital of France", + description: "Geography knowledge test", + tool: "chat", + input: { + message: "What is the capital of France?" + }, + // Use Groq if available, otherwise default + model: CONFIG.providers.groq.apiKey ? { + main_model: { + provider: 'groq', + model: 'llama-3.1-8b-instant', + api_key: CONFIG.providers.groq.apiKey + } + } : {} + }, + { + id: "creative_eval", + name: "Creative Writing", + description: "Short creative writing task", + tool: "chat", + input: { + message: "Write a two-sentence story about a robot discovering friendship." + }, + // Use OpenRouter if available, otherwise default + model: CONFIG.providers.openrouter.apiKey ? { + main_model: { + provider: 'openrouter', + model: 'anthropic/claude-3-sonnet', + api_key: CONFIG.providers.openrouter.apiKey + } + } : {} + }, + { + id: "tech_eval", + name: "Technology Knowledge", + description: "Basic technology concepts", + tool: "chat", + input: { + message: "Explain what HTTP stands for and what it's used for in simple terms." + }, + // Use LiteLLM if available, otherwise default + model: (CONFIG.providers.litellm.apiKey && CONFIG.providers.litellm.endpoint) ? { + main_model: { + provider: 'litellm', + model: 'claude-3-haiku-20240307', + api_key: CONFIG.providers.litellm.apiKey, + endpoint: CONFIG.providers.litellm.endpoint + } + } : {} + } +]; + +// Push evaluations to stack (they will be popped in reverse order) +console.log('๐Ÿ“š Adding evaluations to stack...'); +evaluations.forEach((evaluation, index) => { + evalStack.push(evaluation); + const providerInfo = evaluation.model?.main_model?.provider ? ` [${evaluation.model.main_model.provider}]` : ' [default]'; + console.log(` ${index + 1}. ${evaluation.name} (${evaluation.id})${providerInfo}`); +}); + +console.log(`โœ… Stack initialized with ${evalStack.size()} evaluations`); + +console.log('๐Ÿ”ง Creating server...'); +const server = new EvalServer({ + authKey: 'hello', + host: '127.0.0.1', + port: 8080 +}); + +console.log('๐Ÿ”ง Setting up event handlers...'); + +server.on('started', (info) => { + console.log('โœ… Server started event fired:', info); +}); + +server.on('error', (error) => { + console.log('โŒ Server error:', error); +}); + +server.onConnect(async client => { + console.log('๐ŸŽ‰ CLIENT CONNECTED!'); + console.log(' - Client ID:', client.id); + console.log(' - Client tabId:', client.tabId); + console.log(' - Client info:', client.getInfo()); + + // Check if we have evaluations left in the stack + if (evalStack.isEmpty()) { + console.log('โš ๏ธ No more evaluations in stack for this client'); + console.log(' Consider refilling the stack or handling this scenario'); + return; + } + + // Pop the next evaluation from the stack + const evaluation = evalStack.pop(); + const providerInfo = evaluation.model?.main_model?.provider ? ` using ${evaluation.model.main_model.provider}` : ' using default provider'; + console.log(`๐Ÿ“‹ Assigning evaluation: "${evaluation.name}" (${evaluation.id})${providerInfo}`); + console.log(`๐Ÿ“Š Remaining evaluations in stack: ${evalStack.size()}`); + + try { + console.log('๐Ÿ”„ Starting evaluation...'); + if (evaluation.model?.main_model?.provider) { + console.log(`๐Ÿ”ง Using LLM provider: ${evaluation.model.main_model.provider} with model: ${evaluation.model.main_model.model}`); + } + + let response = await client.evaluate(evaluation); + + console.log('โœ… Evaluation completed!'); + console.log(`๐Ÿ“Š Response for "${evaluation.name}":`, JSON.stringify(response, null, 2)); + } catch (error) { + console.log(`โŒ Evaluation "${evaluation.name}" failed:`, error.message); + } +}); + +server.onDisconnect(clientInfo => { + console.log('๐Ÿ‘‹ CLIENT DISCONNECTED:', clientInfo); +}); + +console.log('๐Ÿ”ง Starting server...'); +await server.start(); +console.log('โœ… Server started successfully on ws://127.0.0.1:8080'); +console.log('โณ Waiting for DevTools clients to connect...'); +console.log(' WebSocket URL: ws://127.0.0.1:8080'); +console.log(' Auth Key: hello'); +console.log(`๐Ÿ“š Stack contains ${evalStack.size()} evaluations ready to be distributed`); + +// Add periodic status check +setInterval(() => { + const status = server.getStatus(); + console.log(`๐Ÿ“Š Status: ${status.connectedClients} clients, ${status.readyClients} ready, ${evalStack.size()} evals remaining`); +}, 10000); \ No newline at end of file diff --git a/eval-server/nodejs/examples/with-http-wrapper.js b/eval-server/nodejs/examples/with-http-wrapper.js new file mode 100644 index 0000000..2ec9d0f --- /dev/null +++ b/eval-server/nodejs/examples/with-http-wrapper.js @@ -0,0 +1,45 @@ +#!/usr/bin/env node + +// Copyright 2025 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Example demonstrating how to use EvalServer with optional HTTP API wrapper + +import { EvalServer } from '../src/lib/EvalServer.js'; +import { HTTPWrapper } from '../src/lib/HTTPWrapper.js'; + +console.log('๐Ÿ”ง Creating EvalServer...'); +const evalServer = new EvalServer({ + // No authKey - authentication disabled for automated mode + host: '127.0.0.1', + port: 8082 +}); + +console.log('๐Ÿ”ง Creating HTTP wrapper...'); +const httpWrapper = new HTTPWrapper(evalServer, { + port: 8080, + host: '127.0.0.1' +}); + + +console.log('๐Ÿ”ง Starting EvalServer...'); +await evalServer.start(); +console.log('โœ… EvalServer started on ws://127.0.0.1:8082'); + +console.log('๐Ÿ”ง Starting HTTP wrapper...'); +await httpWrapper.start(); +console.log('โœ… HTTP API started on http://127.0.0.1:8080'); + +console.log('โณ Waiting for DevTools client to connect...'); +console.log(' WebSocket URL: ws://127.0.0.1:8082'); +console.log(' HTTP API URL: http://127.0.0.1:8080'); +console.log(' Auth: Disabled (automated mode)'); + +// Add periodic status check +setInterval(() => { + const evalServerStatus = evalServer.getStatus(); + const httpWrapperStatus = httpWrapper.getStatus(); + console.log(`๐Ÿ“Š EvalServer: ${evalServerStatus.connectedClients} clients, ${evalServerStatus.readyClients} ready`); + console.log(`๐Ÿ“Š HTTP API: ${httpWrapperStatus.isRunning ? 'running' : 'stopped'} on ${httpWrapperStatus.url}`); +}, 15000); \ No newline at end of file diff --git a/eval-server/nodejs/logs/.gitignore b/eval-server/nodejs/logs/.gitignore new file mode 100644 index 0000000..326f777 --- /dev/null +++ b/eval-server/nodejs/logs/.gitignore @@ -0,0 +1,2 @@ +*.log +*.jsonl \ No newline at end of file diff --git a/eval-server/nodejs/package-lock.json b/eval-server/nodejs/package-lock.json new file mode 100644 index 0000000..99f3ff7 --- /dev/null +++ b/eval-server/nodejs/package-lock.json @@ -0,0 +1,832 @@ +{ + "name": "bo-eval-server", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "bo-eval-server", + "version": "1.0.0", + "license": "MIT", + "dependencies": { + "dotenv": "^16.3.1", + "js-yaml": "^4.1.0", + "openai": "^4.24.1", + "uuid": "^9.0.1", + "winston": "^3.11.0", + "ws": "^8.16.0" + }, + "bin": { + "eval-server": "src/cli/index.js" + }, + "devDependencies": { + "@types/ws": "^8.5.10" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@colors/colors": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/@colors/colors/-/colors-1.6.0.tgz", + "integrity": "sha512-Ir+AOibqzrIsL6ajt3Rz3LskB7OiMVHqltZmspbW/TJuTVuyOMirVqAkjfY6JISiLHgyNqicAC8AyHHGzNd/dA==", + "license": "MIT", + "engines": { + "node": ">=0.1.90" + } + }, + "node_modules/@dabh/diagnostics": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/@dabh/diagnostics/-/diagnostics-2.0.3.tgz", + "integrity": "sha512-hrlQOIi7hAfzsMqlGSFyVucrx38O+j6wiGOf//H2ecvIEqYN4ADBSS2iLMh5UFyDunCNniUIPk/q3riFv45xRA==", + "license": "MIT", + "dependencies": { + "colorspace": "1.1.x", + "enabled": "2.0.x", + "kuler": "^2.0.0" + } + }, + "node_modules/@types/node": { + "version": "24.0.13", + "resolved": "https://registry.npmjs.org/@types/node/-/node-24.0.13.tgz", + "integrity": "sha512-Qm9OYVOFHFYg3wJoTSrz80hoec5Lia/dPp84do3X7dZvLikQvM1YpmvTBEdIr/e+U8HTkFjLHLnl78K/qjf+jQ==", + "license": "MIT", + "dependencies": { + "undici-types": "~7.8.0" + } + }, + "node_modules/@types/node-fetch": { + "version": "2.6.12", + "resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.12.tgz", + "integrity": "sha512-8nneRWKCg3rMtF69nLQJnOYUcbafYeFSjqkw3jCRLsqkWFlHaoQrr5mXmofFGOx3DKn7UfmBMyov8ySvLRVldA==", + "license": "MIT", + "dependencies": { + "@types/node": "*", + "form-data": "^4.0.0" + } + }, + "node_modules/@types/triple-beam": { + "version": "1.3.5", + "resolved": "https://registry.npmjs.org/@types/triple-beam/-/triple-beam-1.3.5.tgz", + "integrity": "sha512-6WaYesThRMCl19iryMYP7/x2OVgCtbIVflDGFpWnb9irXI3UjYE4AzmYuiUKY1AJstGijoY+MgUszMgRxIYTYw==", + "license": "MIT" + }, + "node_modules/@types/ws": { + "version": "8.18.1", + "resolved": "https://registry.npmjs.org/@types/ws/-/ws-8.18.1.tgz", + "integrity": "sha512-ThVF6DCVhA8kUGy+aazFQ4kXQ7E1Ty7A3ypFOe0IcJV8O/M511G99AW24irKrW56Wt44yG9+ij8FaqoBGkuBXg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/node": "*" + } + }, + "node_modules/abort-controller": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz", + "integrity": "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==", + "license": "MIT", + "dependencies": { + "event-target-shim": "^5.0.0" + }, + "engines": { + "node": ">=6.5" + } + }, + "node_modules/agentkeepalive": { + "version": "4.6.0", + "resolved": "https://registry.npmjs.org/agentkeepalive/-/agentkeepalive-4.6.0.tgz", + "integrity": "sha512-kja8j7PjmncONqaTsB8fQ+wE2mSU2DJ9D4XKoJ5PFWIdRMa6SLSN1ff4mOr4jCbfRSsxR4keIiySJU0N9T5hIQ==", + "license": "MIT", + "dependencies": { + "humanize-ms": "^1.2.1" + }, + "engines": { + "node": ">= 8.0.0" + } + }, + "node_modules/argparse": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", + "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==", + "license": "Python-2.0" + }, + "node_modules/async": { + "version": "3.2.6", + "resolved": "https://registry.npmjs.org/async/-/async-3.2.6.tgz", + "integrity": "sha512-htCUDlxyyCLMgaM3xXg0C0LW2xqfuQ6p05pCEIsXuyQ+a1koYKTuBMzRNwmybfLgvJDMd0r1LTn4+E0Ti6C2AA==", + "license": "MIT" + }, + "node_modules/asynckit": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", + "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==", + "license": "MIT" + }, + "node_modules/call-bind-apply-helpers": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", + "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/color": { + "version": "3.2.1", + "resolved": "https://registry.npmjs.org/color/-/color-3.2.1.tgz", + "integrity": "sha512-aBl7dZI9ENN6fUGC7mWpMTPNHmWUSNan9tuWN6ahh5ZLNk9baLJOnSMlrQkHcrfFgz2/RigjUVAjdx36VcemKA==", + "license": "MIT", + "dependencies": { + "color-convert": "^1.9.3", + "color-string": "^1.6.0" + } + }, + "node_modules/color-convert": { + "version": "1.9.3", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz", + "integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==", + "license": "MIT", + "dependencies": { + "color-name": "1.1.3" + } + }, + "node_modules/color-name": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz", + "integrity": "sha512-72fSenhMw2HZMTVHeCA9KCmpEIbzWiQsjN+BHcBbS9vr1mtt+vJjPdksIBNUmKAW8TFUDPJK5SUU3QhE9NEXDw==", + "license": "MIT" + }, + "node_modules/color-string": { + "version": "1.9.1", + "resolved": "https://registry.npmjs.org/color-string/-/color-string-1.9.1.tgz", + "integrity": "sha512-shrVawQFojnZv6xM40anx4CkoDP+fZsw/ZerEMsW/pyzsRbElpsL/DBVW7q3ExxwusdNXI3lXpuhEZkzs8p5Eg==", + "license": "MIT", + "dependencies": { + "color-name": "^1.0.0", + "simple-swizzle": "^0.2.2" + } + }, + "node_modules/colorspace": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/colorspace/-/colorspace-1.1.4.tgz", + "integrity": "sha512-BgvKJiuVu1igBUF2kEjRCZXol6wiiGbY5ipL/oVPwm0BL9sIpMIzM8IK7vwuxIIzOXMV3Ey5w+vxhm0rR/TN8w==", + "license": "MIT", + "dependencies": { + "color": "^3.1.3", + "text-hex": "1.0.x" + } + }, + "node_modules/combined-stream": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", + "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", + "license": "MIT", + "dependencies": { + "delayed-stream": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/delayed-stream": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", + "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", + "license": "MIT", + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/dotenv": { + "version": "16.6.1", + "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.6.1.tgz", + "integrity": "sha512-uBq4egWHTcTt33a72vpSG0z3HnPuIl6NqYcTrKEg2azoEyl2hpW0zqlxysq2pK9HlDIHyHyakeYaYnSAwd8bow==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://dotenvx.com" + } + }, + "node_modules/dunder-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", + "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.1", + "es-errors": "^1.3.0", + "gopd": "^1.2.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/enabled": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/enabled/-/enabled-2.0.0.tgz", + "integrity": "sha512-AKrN98kuwOzMIdAizXGI86UFBoo26CL21UM763y1h/GMSJ4/OHU9k2YlsmBpyScFo/wbLzWQJBMCW4+IO3/+OQ==", + "license": "MIT" + }, + "node_modules/es-define-property": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz", + "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-errors": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", + "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-object-atoms": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz", + "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-set-tostringtag": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz", + "integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.6", + "has-tostringtag": "^1.0.2", + "hasown": "^2.0.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/event-target-shim": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/event-target-shim/-/event-target-shim-5.0.1.tgz", + "integrity": "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/fecha": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/fecha/-/fecha-4.2.3.tgz", + "integrity": "sha512-OP2IUU6HeYKJi3i0z4A19kHMQoLVs4Hc+DPqqxI2h/DPZHTm/vjsfC6P0b4jCMy14XizLBqvndQ+UilD7707Jw==", + "license": "MIT" + }, + "node_modules/fn.name": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/fn.name/-/fn.name-1.1.0.tgz", + "integrity": "sha512-GRnmB5gPyJpAhTQdSZTSp9uaPSvl09KoYcMQtsB9rQoOmzs9dH6ffeccH+Z+cv6P68Hu5bC6JjRh4Ah/mHSNRw==", + "license": "MIT" + }, + "node_modules/form-data": { + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.3.tgz", + "integrity": "sha512-qsITQPfmvMOSAdeyZ+12I1c+CKSstAFAwu+97zrnWAbIr5u8wfsExUzCesVLC8NgHuRUqNN4Zy6UPWUTRGslcA==", + "license": "MIT", + "dependencies": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.8", + "es-set-tostringtag": "^2.1.0", + "hasown": "^2.0.2", + "mime-types": "^2.1.12" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/form-data-encoder": { + "version": "1.7.2", + "resolved": "https://registry.npmjs.org/form-data-encoder/-/form-data-encoder-1.7.2.tgz", + "integrity": "sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==", + "license": "MIT" + }, + "node_modules/formdata-node": { + "version": "4.4.1", + "resolved": "https://registry.npmjs.org/formdata-node/-/formdata-node-4.4.1.tgz", + "integrity": "sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==", + "license": "MIT", + "dependencies": { + "node-domexception": "1.0.0", + "web-streams-polyfill": "4.0.0-beta.3" + }, + "engines": { + "node": ">= 12.20" + } + }, + "node_modules/function-bind": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", + "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-intrinsic": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz", + "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.2", + "es-define-property": "^1.0.1", + "es-errors": "^1.3.0", + "es-object-atoms": "^1.1.1", + "function-bind": "^1.1.2", + "get-proto": "^1.0.1", + "gopd": "^1.2.0", + "has-symbols": "^1.1.0", + "hasown": "^2.0.2", + "math-intrinsics": "^1.1.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz", + "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==", + "license": "MIT", + "dependencies": { + "dunder-proto": "^1.0.1", + "es-object-atoms": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/gopd": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz", + "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-symbols": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz", + "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-tostringtag": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz", + "integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==", + "license": "MIT", + "dependencies": { + "has-symbols": "^1.0.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/hasown": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", + "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", + "license": "MIT", + "dependencies": { + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/humanize-ms": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/humanize-ms/-/humanize-ms-1.2.1.tgz", + "integrity": "sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==", + "license": "MIT", + "dependencies": { + "ms": "^2.0.0" + } + }, + "node_modules/inherits": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", + "license": "ISC" + }, + "node_modules/is-arrayish": { + "version": "0.3.2", + "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.3.2.tgz", + "integrity": "sha512-eVRqCvVlZbuw3GrM63ovNSNAeA1K16kaR/LRY/92w0zxQ5/1YzwblUX652i4Xs9RwAGjW9d9y6X88t8OaAJfWQ==", + "license": "MIT" + }, + "node_modules/is-stream": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-stream/-/is-stream-2.0.1.tgz", + "integrity": "sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==", + "license": "MIT", + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/js-yaml": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz", + "integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==", + "license": "MIT", + "dependencies": { + "argparse": "^2.0.1" + }, + "bin": { + "js-yaml": "bin/js-yaml.js" + } + }, + "node_modules/kuler": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/kuler/-/kuler-2.0.0.tgz", + "integrity": "sha512-Xq9nH7KlWZmXAtodXDDRE7vs6DU1gTU8zYDHDiWLSip45Egwq3plLHzPn27NgvzL2r1LMPC1vdqh98sQxtqj4A==", + "license": "MIT" + }, + "node_modules/logform": { + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/logform/-/logform-2.7.0.tgz", + "integrity": "sha512-TFYA4jnP7PVbmlBIfhlSe+WKxs9dklXMTEGcBCIvLhE/Tn3H6Gk1norupVW7m5Cnd4bLcr08AytbyV/xj7f/kQ==", + "license": "MIT", + "dependencies": { + "@colors/colors": "1.6.0", + "@types/triple-beam": "^1.3.2", + "fecha": "^4.2.0", + "ms": "^2.1.1", + "safe-stable-stringify": "^2.3.1", + "triple-beam": "^1.3.0" + }, + "engines": { + "node": ">= 12.0.0" + } + }, + "node_modules/math-intrinsics": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz", + "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/mime-db": { + "version": "1.52.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", + "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "2.1.35", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", + "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", + "license": "MIT", + "dependencies": { + "mime-db": "1.52.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "license": "MIT" + }, + "node_modules/node-domexception": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz", + "integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==", + "deprecated": "Use your platform's native DOMException instead", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/jimmywarting" + }, + { + "type": "github", + "url": "https://paypal.me/jimmywarting" + } + ], + "license": "MIT", + "engines": { + "node": ">=10.5.0" + } + }, + "node_modules/node-fetch": { + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", + "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", + "license": "MIT", + "dependencies": { + "whatwg-url": "^5.0.0" + }, + "engines": { + "node": "4.x || >=6.0.0" + }, + "peerDependencies": { + "encoding": "^0.1.0" + }, + "peerDependenciesMeta": { + "encoding": { + "optional": true + } + } + }, + "node_modules/one-time": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/one-time/-/one-time-1.0.0.tgz", + "integrity": "sha512-5DXOiRKwuSEcQ/l0kGCF6Q3jcADFv5tSmRaJck/OqkVFcOzutB134KRSfF0xDrL39MNnqxbHBbUUcjZIhTgb2g==", + "license": "MIT", + "dependencies": { + "fn.name": "1.x.x" + } + }, + "node_modules/openai": { + "version": "4.104.0", + "resolved": "https://registry.npmjs.org/openai/-/openai-4.104.0.tgz", + "integrity": "sha512-p99EFNsA/yX6UhVO93f5kJsDRLAg+CTA2RBqdHK4RtK8u5IJw32Hyb2dTGKbnnFmnuoBv5r7Z2CURI9sGZpSuA==", + "license": "Apache-2.0", + "dependencies": { + "@types/node": "^18.11.18", + "@types/node-fetch": "^2.6.4", + "abort-controller": "^3.0.0", + "agentkeepalive": "^4.2.1", + "form-data-encoder": "1.7.2", + "formdata-node": "^4.3.2", + "node-fetch": "^2.6.7" + }, + "bin": { + "openai": "bin/cli" + }, + "peerDependencies": { + "ws": "^8.18.0", + "zod": "^3.23.8" + }, + "peerDependenciesMeta": { + "ws": { + "optional": true + }, + "zod": { + "optional": true + } + } + }, + "node_modules/openai/node_modules/@types/node": { + "version": "18.19.118", + "resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.118.tgz", + "integrity": "sha512-hIPK0hSrrcaoAu/gJMzN3QClXE4QdCdFvaenJ0JsjIbExP1JFFVH+RHcBt25c9n8bx5dkIfqKE+uw6BmBns7ug==", + "license": "MIT", + "dependencies": { + "undici-types": "~5.26.4" + } + }, + "node_modules/openai/node_modules/undici-types": { + "version": "5.26.5", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", + "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==", + "license": "MIT" + }, + "node_modules/readable-stream": { + "version": "3.6.2", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", + "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==", + "license": "MIT", + "dependencies": { + "inherits": "^2.0.3", + "string_decoder": "^1.1.1", + "util-deprecate": "^1.0.1" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/safe-buffer": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, + "node_modules/safe-stable-stringify": { + "version": "2.5.0", + "resolved": "https://registry.npmjs.org/safe-stable-stringify/-/safe-stable-stringify-2.5.0.tgz", + "integrity": "sha512-b3rppTKm9T+PsVCBEOUR46GWI7fdOs00VKZ1+9c1EWDaDMvjQc6tUwuFyIprgGgTcWoVHSKrU8H31ZHA2e0RHA==", + "license": "MIT", + "engines": { + "node": ">=10" + } + }, + "node_modules/simple-swizzle": { + "version": "0.2.2", + "resolved": "https://registry.npmjs.org/simple-swizzle/-/simple-swizzle-0.2.2.tgz", + "integrity": "sha512-JA//kQgZtbuY83m+xT+tXJkmJncGMTFT+C+g2h2R9uxkYIrE2yy9sgmcLhCnw57/WSD+Eh3J97FPEDFnbXnDUg==", + "license": "MIT", + "dependencies": { + "is-arrayish": "^0.3.1" + } + }, + "node_modules/stack-trace": { + "version": "0.0.10", + "resolved": "https://registry.npmjs.org/stack-trace/-/stack-trace-0.0.10.tgz", + "integrity": "sha512-KGzahc7puUKkzyMt+IqAep+TVNbKP+k2Lmwhub39m1AsTSkaDutx56aDCo+HLDzf/D26BIHTJWNiTG1KAJiQCg==", + "license": "MIT", + "engines": { + "node": "*" + } + }, + "node_modules/string_decoder": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz", + "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==", + "license": "MIT", + "dependencies": { + "safe-buffer": "~5.2.0" + } + }, + "node_modules/text-hex": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/text-hex/-/text-hex-1.0.0.tgz", + "integrity": "sha512-uuVGNWzgJ4yhRaNSiubPY7OjISw4sw4E5Uv0wbjp+OzcbmVU/rsT8ujgcXJhn9ypzsgr5vlzpPqP+MBBKcGvbg==", + "license": "MIT" + }, + "node_modules/tr46": { + "version": "0.0.3", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", + "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==", + "license": "MIT" + }, + "node_modules/triple-beam": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/triple-beam/-/triple-beam-1.4.1.tgz", + "integrity": "sha512-aZbgViZrg1QNcG+LULa7nhZpJTZSLm/mXnHXnbAbjmN5aSa0y7V+wvv6+4WaBtpISJzThKy+PIPxc1Nq1EJ9mg==", + "license": "MIT", + "engines": { + "node": ">= 14.0.0" + } + }, + "node_modules/undici-types": { + "version": "7.8.0", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.8.0.tgz", + "integrity": "sha512-9UJ2xGDvQ43tYyVMpuHlsgApydB8ZKfVYTsLDhXkFL/6gfkp+U8xTGdh8pMJv1SpZna0zxG1DwsKZsreLbXBxw==", + "license": "MIT" + }, + "node_modules/util-deprecate": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", + "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==", + "license": "MIT" + }, + "node_modules/uuid": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz", + "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==", + "funding": [ + "https://github.com/sponsors/broofa", + "https://github.com/sponsors/ctavan" + ], + "license": "MIT", + "bin": { + "uuid": "dist/bin/uuid" + } + }, + "node_modules/web-streams-polyfill": { + "version": "4.0.0-beta.3", + "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-4.0.0-beta.3.tgz", + "integrity": "sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==", + "license": "MIT", + "engines": { + "node": ">= 14" + } + }, + "node_modules/webidl-conversions": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", + "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==", + "license": "BSD-2-Clause" + }, + "node_modules/whatwg-url": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", + "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", + "license": "MIT", + "dependencies": { + "tr46": "~0.0.3", + "webidl-conversions": "^3.0.0" + } + }, + "node_modules/winston": { + "version": "3.17.0", + "resolved": "https://registry.npmjs.org/winston/-/winston-3.17.0.tgz", + "integrity": "sha512-DLiFIXYC5fMPxaRg832S6F5mJYvePtmO5G9v9IgUFPhXm9/GkXarH/TUrBAVzhTCzAj9anE/+GjrgXp/54nOgw==", + "license": "MIT", + "dependencies": { + "@colors/colors": "^1.6.0", + "@dabh/diagnostics": "^2.0.2", + "async": "^3.2.3", + "is-stream": "^2.0.0", + "logform": "^2.7.0", + "one-time": "^1.0.0", + "readable-stream": "^3.4.0", + "safe-stable-stringify": "^2.3.1", + "stack-trace": "0.0.x", + "triple-beam": "^1.3.0", + "winston-transport": "^4.9.0" + }, + "engines": { + "node": ">= 12.0.0" + } + }, + "node_modules/winston-transport": { + "version": "4.9.0", + "resolved": "https://registry.npmjs.org/winston-transport/-/winston-transport-4.9.0.tgz", + "integrity": "sha512-8drMJ4rkgaPo1Me4zD/3WLfI/zPdA9o2IipKODunnGDcuqbHwjsbB79ylv04LCGGzU0xQ6vTznOMpQGaLhhm6A==", + "license": "MIT", + "dependencies": { + "logform": "^2.7.0", + "readable-stream": "^3.6.2", + "triple-beam": "^1.3.0" + }, + "engines": { + "node": ">= 12.0.0" + } + }, + "node_modules/ws": { + "version": "8.18.3", + "resolved": "https://registry.npmjs.org/ws/-/ws-8.18.3.tgz", + "integrity": "sha512-PEIGCY5tSlUt50cqyMXfCzX+oOPqN0vuGqWzbcJ2xvnkzkq46oOpz7dQaTDBdfICb4N14+GARUDw2XV2N4tvzg==", + "license": "MIT", + "engines": { + "node": ">=10.0.0" + }, + "peerDependencies": { + "bufferutil": "^4.0.1", + "utf-8-validate": ">=5.0.2" + }, + "peerDependenciesMeta": { + "bufferutil": { + "optional": true + }, + "utf-8-validate": { + "optional": true + } + } + } + } +} diff --git a/eval-server/nodejs/package.json b/eval-server/nodejs/package.json new file mode 100644 index 0000000..6f92a73 --- /dev/null +++ b/eval-server/nodejs/package.json @@ -0,0 +1,43 @@ +{ + "name": "bo-eval-server", + "version": "1.0.0", + "description": "WebSocket server for evaluating LLM agents with LLM-as-a-judge", + "main": "src/lib/EvalServer.js", + "type": "module", + "exports": { + ".": "./src/lib/EvalServer.js", + "./EvalServer": "./src/lib/EvalServer.js", + "./EvaluationLoader": "./src/lib/EvaluationLoader.js", + "./HTTPWrapper": "./src/lib/HTTPWrapper.js", + "./judges/Judge": "./src/lib/judges/Judge.js", + "./judges/LLMJudge": "./src/lib/judges/LLMJudge.js", + "./CLI": "./src/cli/CLI.js" + }, + "bin": { + "eval-server": "./src/cli/index.js" + }, + "scripts": { + "start": "node examples/with-http-wrapper.js", + "dev": "node --watch examples/with-http-wrapper.js", + "cli": "node src/cli/index.js", + "lib:example": "node examples/library-usage.js", + "lib:example:http": "node examples/with-http-wrapper.js" + }, + "keywords": ["websocket", "llm", "evaluation", "rpc", "library", "programmatic"], + "author": "", + "license": "MIT", + "dependencies": { + "ws": "^8.16.0", + "uuid": "^9.0.1", + "winston": "^3.11.0", + "dotenv": "^16.3.1", + "openai": "^4.24.1", + "js-yaml": "^4.1.0" + }, + "devDependencies": { + "@types/ws": "^8.5.10" + }, + "engines": { + "node": ">=18.0.0" + } +} \ No newline at end of file diff --git a/eval-server/nodejs/schemas/client.schema.json b/eval-server/nodejs/schemas/client.schema.json new file mode 100644 index 0000000..8dfdd3b --- /dev/null +++ b/eval-server/nodejs/schemas/client.schema.json @@ -0,0 +1,299 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "title": "Client Configuration Schema", + "description": "Schema for validating client YAML configuration files", + "required": ["client", "settings", "evaluations"], + "properties": { + "client": { + "type": "object", + "required": ["id", "name"], + "properties": { + "id": { + "type": "string", + "pattern": "^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", + "description": "UUID v4 format client identifier" + }, + "name": { + "type": "string", + "minLength": 1, + "maxLength": 100, + "description": "Human-readable client name" + }, + "secret_key": { + "type": "string", + "description": "Optional authentication secret key" + }, + "description": { + "type": "string", + "description": "Optional client description" + } + } + }, + "settings": { + "type": "object", + "properties": { + "max_concurrent_evaluations": { + "type": "integer", + "minimum": 1, + "maximum": 10, + "default": 3 + }, + "default_timeout": { + "type": "integer", + "minimum": 5000, + "maximum": 300000, + "default": 30000, + "description": "Default timeout in milliseconds" + }, + "retry_policy": { + "type": "object", + "properties": { + "max_retries": { + "type": "integer", + "minimum": 0, + "maximum": 5, + "default": 2 + }, + "backoff_multiplier": { + "type": "number", + "minimum": 1, + "maximum": 5, + "default": 2 + }, + "initial_delay": { + "type": "integer", + "minimum": 100, + "maximum": 10000, + "default": 1000, + "description": "Initial delay in milliseconds" + } + } + } + } + }, + "evaluations": { + "type": "array", + "items": { + "$ref": "#/definitions/evaluation" + } + } + }, + "definitions": { + "evaluation": { + "type": "object", + "required": ["id", "name", "tool", "input"], + "properties": { + "id": { + "type": "string", + "pattern": "^[a-zA-Z0-9-_]+$", + "minLength": 1, + "maxLength": 100, + "description": "Unique evaluation identifier" + }, + "name": { + "type": "string", + "minLength": 1, + "maxLength": 200, + "description": "Human-readable evaluation name" + }, + "description": { + "type": "string", + "description": "Optional evaluation description" + }, + "enabled": { + "type": "boolean", + "default": true, + "description": "Whether this evaluation is enabled" + }, + "target": { + "type": "object", + "properties": { + "url": { + "type": "string", + "format": "uri", + "description": "Target URL for the evaluation" + }, + "wait_for": { + "type": "string", + "enum": ["load", "domcontentloaded", "networkidle"], + "default": "networkidle" + }, + "wait_timeout": { + "type": "integer", + "minimum": 1000, + "maximum": 30000, + "default": 5000 + } + } + }, + "tool": { + "type": "string", + "enum": [ + "extract_data", + "extract_schema_streamlined", + "research_agent", + "action_agent", + "web_task_agent" + ], + "description": "Tool to execute for this evaluation" + }, + "timeout": { + "type": "integer", + "minimum": 5000, + "maximum": 300000, + "description": "Evaluation timeout in milliseconds" + }, + "input": { + "type": "object", + "description": "Tool-specific input parameters" + }, + "validation": { + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "enum": ["llm-judge", "snapshot", "hybrid"] + }, + "llm_judge": { + "$ref": "#/definitions/llm_judge_config" + }, + "snapshot": { + "$ref": "#/definitions/snapshot_config" + }, + "hybrid": { + "type": "object", + "properties": { + "weight_llm": { + "type": "number", + "minimum": 0, + "maximum": 1 + }, + "weight_snapshot": { + "type": "number", + "minimum": 0, + "maximum": 1 + } + } + } + } + }, + "metadata": { + "type": "object", + "properties": { + "tags": { + "type": "array", + "items": { + "type": "string" + } + }, + "priority": { + "type": "string", + "enum": ["low", "normal", "high"], + "default": "normal" + }, + "owner": { + "type": "string", + "description": "Responsible team or person" + }, + "created": { + "type": "string", + "format": "date" + }, + "modified": { + "type": "string", + "format": "date" + } + } + } + } + }, + "llm_judge_config": { + "type": "object", + "required": ["criteria"], + "properties": { + "model": { + "type": "string", + "default": "gpt-4o-mini", + "description": "LLM model to use for evaluation" + }, + "temperature": { + "type": "number", + "minimum": 0, + "maximum": 2, + "default": 0.3 + }, + "criteria": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1, + "description": "List of evaluation criteria" + }, + "visual_verification": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean", + "default": false + }, + "capture_before": { + "type": "boolean", + "default": true + }, + "capture_after": { + "type": "boolean", + "default": true + }, + "prompts": { + "type": "array", + "items": { + "type": "string" + } + } + } + } + } + }, + "snapshot_config": { + "type": "object", + "properties": { + "structure_only": { + "type": "boolean", + "default": false, + "description": "Compare only structure, not values" + }, + "exclude_paths": { + "type": "array", + "items": { + "type": "string" + }, + "description": "JSONPath expressions for fields to exclude" + }, + "sanitizers": { + "type": "array", + "items": { + "type": "object", + "required": ["path"], + "properties": { + "path": { + "type": "string", + "description": "JSONPath to the field" + }, + "pattern": { + "type": "string", + "description": "Regex pattern to match" + }, + "replacement": { + "type": "string", + "description": "Replacement string" + } + } + } + } + } + } + } +} \ No newline at end of file diff --git a/eval-server/nodejs/src/api-server.js b/eval-server/nodejs/src/api-server.js new file mode 100644 index 0000000..2713da4 --- /dev/null +++ b/eval-server/nodejs/src/api-server.js @@ -0,0 +1,492 @@ +// Copyright 2025 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +import http from 'http'; +import url from 'url'; +import fs from 'fs'; +import path from 'path'; +import yaml from 'js-yaml'; +import { v4 as uuidv4 } from 'uuid'; + +import logger from './logger.js'; +// No need to import EvaluationServer - it's passed as constructor parameter + +class APIServer { + constructor(evaluationServer, port = 8081) { + this.evaluationServer = evaluationServer; + this.port = port; + this.server = null; + this.configDefaults = null; + this.loadConfigDefaults(); + } + + /** + * Load default model configuration from config.yaml + */ + loadConfigDefaults() { + try { + const configPath = path.resolve('./evals/config.yaml'); + if (fs.existsSync(configPath)) { + const configContent = fs.readFileSync(configPath, 'utf8'); + this.configDefaults = yaml.load(configContent); + logger.info('Loaded config.yaml defaults:', this.configDefaults); + } else { + logger.warn('config.yaml not found, using hardcoded defaults'); + this.configDefaults = { + model: { + main_model: 'gpt-4.1', + mini_model: 'gpt-4.1-mini', + nano_model: 'gpt-4.1-nano', + provider: 'openai' + } + }; + } + } catch (error) { + logger.error('Failed to load config.yaml:', error); + this.configDefaults = { + model: { + main_model: 'gpt-4.1', + mini_model: 'gpt-4.1-mini', + nano_model: 'gpt-4.1-nano', + provider: 'openai' + } + }; + } + } + + start() { + this.server = http.createServer((req, res) => { + // Enable CORS + res.setHeader('Access-Control-Allow-Origin', '*'); + res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS'); + res.setHeader('Access-Control-Allow-Headers', 'Content-Type'); + + if (req.method === 'OPTIONS') { + res.writeHead(200); + res.end(); + return; + } + + this.handleRequest(req, res); + }); + + this.server.listen(this.port, () => { + logger.info(`API server started on http://localhost:${this.port}`); + }); + } + + async handleRequest(req, res) { + const parsedUrl = url.parse(req.url, true); + const pathname = parsedUrl.pathname; + const method = req.method; + + try { + // Get body for POST requests + let body = ''; + if (method === 'POST') { + for await (const chunk of req) { + body += chunk; + } + } + + let result; + + // Handle dynamic client evaluations route + if (pathname.startsWith('/clients/') && pathname.endsWith('/evaluations')) { + const clientId = pathname.split('/')[2]; + result = this.getClientEvaluations(clientId); + } else { + switch (pathname) { + case '/status': + result = this.getStatus(); + break; + + case '/clients': + result = this.getClients(); + break; + + case '/evaluate': + if (method !== 'POST') { + this.sendError(res, 405, 'Method not allowed'); + return; + } + result = await this.triggerEvaluation(JSON.parse(body)); + break; + + case '/v1/responses': + if (method !== 'POST') { + this.sendError(res, 405, 'Method not allowed'); + return; + } + result = await this.handleResponsesRequest(JSON.parse(body)); + break; + + default: + this.sendError(res, 404, 'Not found'); + return; + } + } + + this.sendResponse(res, 200, result); + + } catch (error) { + logger.error('API error:', error); + this.sendError(res, 500, error.message); + } + } + + getStatus() { + const status = this.evaluationServer.getStatus(); + const clients = this.evaluationServer.getClientManager().getAllClients(); + + return { + server: status, + clients: clients.map(client => ({ + id: client.id, + name: client.name, + connected: this.evaluationServer.connectedClients.has(client.id), + ready: this.evaluationServer.connectedClients.get(client.id)?.ready || false + })) + }; + } + + getClients() { + const clients = this.evaluationServer.getClientManager().getAllClients(); + + return clients.map(client => { + const evaluations = this.evaluationServer.getClientManager().getClientEvaluations(client.id); + const connection = this.evaluationServer.connectedClients.get(client.id); + + return { + id: client.id, + name: client.name, + description: client.description, + connected: !!connection, + ready: connection?.ready || false, + evaluations: evaluations.map(evaluation => ({ + id: evaluation.id, + name: evaluation.name, + tool: evaluation.tool, + status: evaluation.status || 'pending', + enabled: evaluation.enabled !== false + })) + }; + }); + } + + getClientEvaluations(clientId) { + if (!clientId) { + throw new Error('Client ID is required'); + } + + const evaluations = this.evaluationServer.getClientManager().getClientEvaluations(clientId); + return { + clientId, + evaluations: evaluations.map(evaluation => ({ + id: evaluation.id, + name: evaluation.name, + description: evaluation.description, + tool: evaluation.tool, + status: evaluation.status || 'pending', + enabled: evaluation.enabled !== false, + lastRun: evaluation.lastRun, + lastResult: evaluation.lastResult + })) + }; + } + + async triggerEvaluation(payload) { + const { clientId, evaluationId, runAll = false } = payload; + + if (!clientId) { + throw new Error('Client ID is required'); + } + + // Check if client is connected + const connection = this.evaluationServer.connectedClients.get(clientId); + if (!connection || !connection.ready) { + throw new Error(`Client '${clientId}' is not connected or not ready`); + } + + if (runAll) { + // Run all evaluations for the client + const evaluations = this.evaluationServer.getClientManager().getClientEvaluations(clientId); + const results = []; + + for (const evaluation of evaluations) { + try { + this.evaluationServer.getClientManager().updateEvaluationStatus(clientId, evaluation.id, 'pending'); + await this.evaluationServer.executeEvaluation(connection, evaluation); + results.push({ id: evaluation.id, status: 'completed' }); + } catch (error) { + results.push({ id: evaluation.id, status: 'failed', error: error.message }); + } + } + + return { + clientId, + type: 'batch', + results + }; + } + // Run specific evaluation + if (!evaluationId) { + throw new Error('Evaluation ID is required when runAll is false'); + } + + const evaluation = this.evaluationServer.getClientManager().getClientEvaluations(clientId) + .find(e => e.id === evaluationId); + + if (!evaluation) { + throw new Error(`Evaluation '${evaluationId}' not found for client '${clientId}'`); + } + + this.evaluationServer.getClientManager().updateEvaluationStatus(clientId, evaluationId, 'pending'); + await this.evaluationServer.executeEvaluation(connection, evaluation); + + return { + clientId, + evaluationId, + type: 'single', + status: 'completed' + }; + + } + + /** + * Handle OpenAI Responses API compatible requests with nested model format + */ + async handleResponsesRequest(requestBody) { + try { + // Validate required input field + if (!requestBody.input || typeof requestBody.input !== 'string') { + throw new Error('Missing or invalid "input" field. Expected a string.'); + } + + // Handle nested model configuration directly + const nestedModelConfig = this.processNestedModelConfig(requestBody); + + const redact = (mk) => ({ + ...mk, + api_key: mk?.api_key ? `${String(mk.api_key).slice(0, 4)}...` : undefined + }); + logger.info('Processing responses request:', { + input: requestBody.input, + modelConfig: { + main_model: redact(nestedModelConfig.main_model), + mini_model: redact(nestedModelConfig.mini_model), + nano_model: redact(nestedModelConfig.nano_model), + } + }); + + // Find a connected and ready client + const readyClient = this.findReadyClient(); + if (!readyClient) { + throw new Error('No DevTools client is connected and ready. Please ensure a DevTools client is connected to the evaluation server.'); + } + + // Create a dynamic evaluation for this request + const evaluation = this.createDynamicEvaluationNested(requestBody.input, nestedModelConfig); + + // Execute the evaluation on the DevTools client + logger.info('Executing evaluation on DevTools client', { + clientId: readyClient.clientId, + evaluationId: evaluation.id + }); + + const result = await this.evaluationServer.executeEvaluation(readyClient, evaluation); + + // Debug: log the result structure + logger.debug('executeEvaluation result:', result); + + // Extract the response text from the result + const responseText = this.extractResponseText(result); + + // Format in OpenAI Responses API format + return this.formatOpenAIResponse(responseText); + + } catch (error) { + logger.error('Error handling responses request:', error); + throw error; + } + } + + /** + * Process nested model configuration from request body + * @param {Object} requestBody - Request body containing optional model configuration + * @returns {import('./types/model-config').ModelConfig} Nested model configuration + */ + processNestedModelConfig(requestBody) { + const defaults = this.configDefaults?.model || {}; + + // If nested format is provided, use it directly with fallbacks + if (requestBody.model) { + return { + main_model: requestBody.model.main_model || this.createDefaultModelConfig('main', defaults), + mini_model: requestBody.model.mini_model || this.createDefaultModelConfig('mini', defaults), + nano_model: requestBody.model.nano_model || this.createDefaultModelConfig('nano', defaults) + }; + } + + // No model config provided, use defaults + return { + main_model: this.createDefaultModelConfig('main', defaults), + mini_model: this.createDefaultModelConfig('mini', defaults), + nano_model: this.createDefaultModelConfig('nano', defaults) + }; + } + + /** + * Create default model configuration for a tier + * @param {'main' | 'mini' | 'nano'} tier - Model tier + * @param {Object} defaults - Default configuration from config.yaml + * @returns {import('./types/model-config').ModelTierConfig} Model tier configuration + */ + createDefaultModelConfig(tier, defaults) { + const defaultModels = { + main: defaults.main_model || 'gpt-4', + mini: defaults.mini_model || 'gpt-4-mini', + nano: defaults.nano_model || 'gpt-3.5-turbo' + }; + + return { + provider: defaults.provider || 'openai', + model: defaultModels[tier], + api_key: process.env.OPENAI_API_KEY + }; + } + + + /** + * Find a connected and ready client + */ + findReadyClient() { + for (const [clientId, connection] of this.evaluationServer.connectedClients) { + if (connection.ready) { + return connection; + } + } + return null; + } + + /** + * Create a dynamic evaluation object with nested model configuration + * @param {string} input - Input message for the evaluation + * @param {import('./types/model-config').ModelConfig} nestedModelConfig - Model configuration + * @returns {import('./types/model-config').EvaluationRequest} Evaluation request object + */ + createDynamicEvaluationNested(input, nestedModelConfig) { + const evaluationId = `api-eval-${Date.now()}-${Math.random().toString(36).substring(2, 9)}`; + + return { + id: evaluationId, + name: 'API Request', + description: 'Dynamic evaluation created from API request', + enabled: true, + tool: 'chat', + timeout: 1500000, // 25 minutes + input: { + message: input + }, + model: nestedModelConfig, + validation: { + type: 'none' // No validation needed for API responses + }, + metadata: { + tags: ['api', 'dynamic'], + priority: 'high', + source: 'api' + } + }; + } + + + /** + * Extract response text from evaluation result + */ + extractResponseText(result) { + if (!result) { + return 'No response received from evaluation'; + } + + // Handle different result formats + if (typeof result === 'string') { + return result; + } + + // Check for nested evaluation result structure + if (result.output && result.output.response) { + return result.output.response; + } + + if (result.output && result.output.text) { + return result.output.text; + } + + if (result.output && result.output.answer) { + return result.output.answer; + } + + // Check top-level properties + if (result.response) { + return result.response; + } + + if (result.text) { + return result.text; + } + + if (result.answer) { + return result.answer; + } + + // If result is an object, try to extract meaningful content + if (typeof result === 'object') { + return JSON.stringify(result, null, 2); + } + + return 'Unable to extract response text from evaluation result'; + } + + /** + * Format response in OpenAI Responses API format + */ + formatOpenAIResponse(responseText) { + const messageId = `msg_${uuidv4().replace(/-/g, '')}`; + + return [ + { + id: messageId, + type: 'message', + role: 'assistant', + content: [ + { + type: 'output_text', + text: responseText, + annotations: [] + } + ] + } + ]; + } + + sendResponse(res, statusCode, data) { + res.writeHead(statusCode, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify(data, null, 2)); + } + + sendError(res, statusCode, message) { + res.writeHead(statusCode, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ error: message })); + } + + stop() { + if (this.server) { + this.server.close(); + logger.info('API server stopped'); + } + } +} + +export { APIServer }; diff --git a/eval-server/nodejs/src/cli/CLI.js b/eval-server/nodejs/src/cli/CLI.js new file mode 100644 index 0000000..240e66b --- /dev/null +++ b/eval-server/nodejs/src/cli/CLI.js @@ -0,0 +1,518 @@ +#!/usr/bin/env node + +// Copyright 2025 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +import readline from 'readline'; +import { EvalServer } from '../lib/EvalServer.js'; + +/** + * EvaluationCLI - Command line interface for the evaluation server + * + * Refactored to use the new EvalServer library instead of directly + * instantiating the old EvaluationServer class. + */ +export class EvaluationCLI { + constructor(options = {}) { + this.server = new EvalServer(options); + this.rl = readline.createInterface({ + input: process.stdin, + output: process.stdout + }); + + // Keep track of connected clients for CLI operations + this.connectedClients = new Map(); + + // Set up event handlers + this.setupEventHandlers(); + } + + /** + * Set up event handlers for the server + */ + setupEventHandlers() { + this.server.onConnect(client => { + this.connectedClients.set(client.id, client); + console.log(`โœ… Client connected: ${client.id}`); + }); + + this.server.onDisconnect(clientInfo => { + this.connectedClients.delete(clientInfo.clientId); + console.log(`โŒ Client disconnected: ${clientInfo.clientId}`); + }); + + this.server.on('error', error => { + console.error(`๐Ÿšจ Server error: ${error.message}`); + }); + } + + async start() { + console.log('๐Ÿš€ Starting Evaluation Server CLI'); + console.log('===================================='); + + // Start the server + try { + await this.server.start(); + } catch (error) { + console.error(`โŒ Failed to start server: ${error.message}`); + process.exit(1); + } + + // Wait a moment for server to start + await new Promise(resolve => setTimeout(resolve, 1000)); + + this.showHelp(); + this.startInteractiveMode(); + } + + showHelp() { + console.log('\\nAvailable commands:'); + console.log(' status - Show server status'); + console.log(' clients - List all clients and their evaluations'); + console.log(' clients-connected - List connected clients'); + console.log(' list-tabs [client-id] - List active tabs (all clients or specific client)'); + console.log(' run - Run specific evaluation for a client'); + console.log(' run-all - Run all evaluations for a client'); + console.log(' run-tab - Run evaluation on specific tab'); + console.log(' eval - Run specific evaluation on all connected clients'); + console.log(' eval all - Run all pending evaluations on all clients'); + console.log(' load-evals [directory] - Load evaluations from directory'); + console.log(' list-evals [category] - List available evaluations'); + console.log(' help - Show this help'); + console.log(' quit - Exit the CLI'); + console.log(''); + } + + startInteractiveMode() { + this.rl.question('eval-server> ', (input) => { + this.handleCommand(input.trim()); + }); + } + + async handleCommand(input) { + const [command, ...args] = input.split(' '); + + try { + switch (command) { + case 'status': + this.showStatus(); + break; + case 'clients': + this.listClients(); + break; + case 'run': + if (args.length < 2) { + console.log('Usage: run '); + } else { + await this.runSpecificEvaluation(args[0], args[1]); + } + break; + case 'run-all': + if (args.length < 1) { + console.log('Usage: run-all '); + } else { + await this.runAllEvaluations(args[0]); + } + break; + case 'eval': + if (args.length === 0) { + console.log('Usage: eval OR eval all'); + } else { + await this.runEvaluation(args.join(' ')); + } + break; + case 'clients-connected': + this.listConnectedClients(); + break; + case 'list-tabs': + this.listTabs(args[0]); + break; + case 'run-tab': + if (args.length < 3) { + console.log('Usage: run-tab '); + } else { + await this.runTabEvaluation(args[0], args[1], args[2]); + } + break; + case 'load-evals': + await this.loadEvaluations(args[0]); + break; + case 'list-evals': + this.listEvaluations(args[0]); + break; + case 'help': + this.showHelp(); + break; + case 'quit': + case 'exit': + this.quit(); + return; + case '': + break; + default: + console.log(`Unknown command: ${command}. Type 'help' for available commands.`); + } + } catch (error) { + console.error('Error:', error.message); + } + + this.startInteractiveMode(); + } + + showStatus() { + const status = this.server.getStatus(); + console.log('\\n๐Ÿ“Š Server Status:'); + console.log(` Running: ${status.isRunning ? 'Yes' : 'No'}`); + console.log(` Host: ${status.host}:${status.port}`); + console.log(` Connected clients: ${status.connectedClients}`); + console.log(` Unique base clients: ${status.uniqueBaseClients}`); + console.log(` Total tabs: ${status.totalTabs}`); + console.log(` Ready clients: ${status.readyClients}`); + console.log(''); + } + + listConnectedClients() { + console.log('\\n๐Ÿ‘ฅ Connected Clients:'); + + if (this.connectedClients.size === 0) { + console.log(' No clients connected'); + } else { + for (const [clientId, client] of this.connectedClients) { + const info = client.getInfo(); + console.log(` Client ID: ${info.id}`); + console.log(` Base Client: ${info.baseClientId}`); + console.log(` Tab ID: ${info.tabId || 'default'}`); + console.log(` Connected: ${info.connectedAt}`); + console.log(` Address: ${info.remoteAddress}`); + console.log(''); + } + } + } + + listClients() { + const clients = this.server.clientManager.getAllClients(); + console.log('\\n๐Ÿ‘ฅ Registered Clients:'); + + if (clients.length === 0) { + console.log(' No clients registered'); + return; + } + + clients.forEach(client => { + console.log(`\\n ๐Ÿ“‹ ${client.name} (${client.id})`); + console.log(` Description: ${client.description || 'N/A'}`); + console.log(` Secret Key: ${client.secretKey ? '***' : 'None'}`); + + const evaluations = this.server.clientManager.getClientEvaluations(client.id); + console.log(` Evaluations: ${evaluations.length}`); + + // Group evaluations by category + const evaluationsByCategory = {}; + evaluations.forEach(evaluation => { + const category = evaluation.category || 'uncategorized'; + if (!evaluationsByCategory[category]) { + evaluationsByCategory[category] = []; + } + evaluationsByCategory[category].push(evaluation); + }); + + // Display evaluations grouped by category + Object.keys(evaluationsByCategory).sort().forEach(category => { + const categoryEvals = evaluationsByCategory[category]; + console.log(`\\n ๐Ÿ“ ${category} (${categoryEvals.length})`); + categoryEvals.forEach(evaluation => { + const status = evaluation.status || 'pending'; + const statusIcon = status === 'completed' ? 'โœ…' : status === 'running' ? '๐Ÿ”„' : status === 'failed' ? 'โŒ' : 'โณ'; + console.log(` ${statusIcon} ${evaluation.id}: ${evaluation.name}`); + }); + }); + }); + console.log(''); + } + + async loadEvaluations(directory) { + try { + const evalsDir = directory || './evals'; + console.log(`\\n๐Ÿ“‚ Loading evaluations from ${evalsDir}...`); + + const result = await this.server.loadEvaluations(evalsDir); + console.log(`โœ… Loaded ${result.totalEvaluations} evaluations from ${result.categories} categories`); + + } catch (error) { + console.log(`โŒ Failed to load evaluations: ${error.message}`); + } + } + + listEvaluations(category) { + const evaluations = category + ? this.server.evaluationLoader.getEvaluationsByCategory(category) + : this.server.evaluationLoader.getAllEvaluations(); + + console.log(`\\n๐Ÿ“‹ ${category ? `Evaluations in category '${category}'` : 'All Evaluations'}:`); + + if (evaluations.length === 0) { + console.log(' No evaluations found'); + return; + } + + // Group by category if showing all + if (!category) { + const byCategory = {}; + evaluations.forEach(evaluation => { + const cat = evaluation.category || 'uncategorized'; + if (!byCategory[cat]) byCategory[cat] = []; + byCategory[cat].push(evaluation); + }); + + Object.keys(byCategory).sort().forEach(cat => { + console.log(`\\n ๐Ÿ“ ${cat}:`); + byCategory[cat].forEach(evaluation => { + const enabledIcon = evaluation.enabled !== false ? 'โœ…' : 'โŒ'; + console.log(` ${enabledIcon} ${evaluation.id}: ${evaluation.name} (${evaluation.tool})`); + }); + }); + } else { + evaluations.forEach(evaluation => { + const enabledIcon = evaluation.enabled !== false ? 'โœ…' : 'โŒ'; + console.log(` ${enabledIcon} ${evaluation.id}: ${evaluation.name} (${evaluation.tool})`); + if (evaluation.description) { + console.log(` ${evaluation.description}`); + } + }); + } + console.log(''); + } + + async runSpecificEvaluation(clientId, evaluationId) { + console.log(`\\n๐ŸŽฏ Running evaluation '${evaluationId}' for client '${clientId}'...`); + + try { + const client = this.connectedClients.get(clientId); + if (!client) { + console.log(`โŒ Client '${clientId}' is not connected`); + return; + } + + // Get the evaluation + const evaluation = this.server.evaluationLoader.getEvaluationById(evaluationId); + if (!evaluation) { + console.log(`โŒ Evaluation '${evaluationId}' not found`); + return; + } + + // Execute the evaluation + const result = await client.evaluate(evaluation); + + console.log(`โœ… Evaluation '${evaluationId}' completed successfully`); + console.log(`Result: ${JSON.stringify(result, null, 2)}`); + + } catch (error) { + console.log(`โŒ Evaluation failed: ${error.message}`); + } + } + + async runAllEvaluations(clientId) { + console.log(`\\n๐Ÿš€ Running all evaluations for client '${clientId}'...`); + + try { + const client = this.connectedClients.get(clientId); + if (!client) { + console.log(`โŒ Client '${clientId}' is not connected`); + return; + } + + // Get all evaluations + const evaluations = this.server.evaluationLoader.getAllEvaluations(); + + if (evaluations.length === 0) { + console.log(`โŒ No evaluations found`); + return; + } + + console.log(`Found ${evaluations.length} evaluations to run...`); + + let completed = 0; + let failed = 0; + + for (const evaluation of evaluations) { + if (evaluation.enabled === false) { + console.log(`โญ๏ธ Skipping disabled: ${evaluation.name}`); + continue; + } + + console.log(`\\n๐Ÿ”„ Running: ${evaluation.name} (${evaluation.id})`); + + try { + await client.evaluate(evaluation); + console.log(` โœ… Completed: ${evaluation.name}`); + completed++; + } catch (error) { + console.log(` โŒ Failed: ${evaluation.name} - ${error.message}`); + failed++; + } + + // Add a small delay between evaluations + await new Promise(resolve => setTimeout(resolve, 2000)); + } + + console.log(`\\n๐Ÿ“Š Results: ${completed} completed, ${failed} failed`); + + } catch (error) { + console.log(`โŒ Batch evaluation failed: ${error.message}`); + } + } + + async runEvaluation(task) { + console.log(`\\n๐Ÿ” Running evaluation: "${task}"`); + console.log('====================================='); + + try { + if (this.connectedClients.size === 0) { + console.log('โŒ No clients connected'); + return; + } + + const clients = Array.from(this.connectedClients.values()); + console.log(`Running on ${clients.length} connected clients...`); + + const results = []; + + for (const client of clients) { + try { + let evaluation; + + if (task === 'all') { + // Run all evaluations for this client + const allEvals = this.server.evaluationLoader.getAllEvaluations() + .filter(e => e.enabled !== false); + + for (const evaluation of allEvals) { + const result = await client.evaluate(evaluation); + results.push({ + clientId: client.id, + evaluationId: evaluation.id, + success: true, + result + }); + } + } else { + // Run specific evaluation + evaluation = this.server.evaluationLoader.getEvaluationById(task); + if (!evaluation) { + results.push({ + clientId: client.id, + evaluationId: task, + success: false, + error: `Evaluation '${task}' not found` + }); + continue; + } + + const result = await client.evaluate(evaluation); + results.push({ + clientId: client.id, + evaluationId: evaluation.id, + success: true, + result + }); + } + } catch (error) { + results.push({ + clientId: client.id, + success: false, + error: error.message + }); + } + } + + // Display results + console.log('\\n๐Ÿ“‹ Evaluation Results:'); + results.forEach((result, index) => { + console.log(`\\n Client ${index + 1} (${result.clientId}):`); + + if (result.success) { + console.log(` โœ… Success`); + if (result.evaluationId) { + console.log(` Evaluation ID: ${result.evaluationId}`); + } + } else { + console.log(` โŒ Error: ${result.error}`); + } + }); + + console.log('\\nโœ… Evaluation completed'); + + } catch (error) { + console.log(`\\nโŒ Evaluation failed: ${error.message}`); + } + } + + listTabs(clientId = null) { + console.log('\\n๐Ÿ“ฑ Active Tabs:'); + + if (clientId) { + // List tabs for specific client + const client = this.connectedClients.get(clientId); + if (!client) { + console.log(` Client '${clientId}' not found`); + return; + } + + const info = client.getInfo(); + console.log(`\\n Client: ${info.baseClientId}`); + console.log(` ๐Ÿ“„ Tab ID: ${info.tabId || 'default'}`); + console.log(` Connected: ${info.connectedAt}`); + console.log(` Address: ${info.remoteAddress || 'unknown'}`); + } else { + // List tabs for all clients + if (this.connectedClients.size === 0) { + console.log(' No active tabs'); + return; + } + + for (const [clientId, client] of this.connectedClients) { + const info = client.getInfo(); + console.log(`\\n ๐Ÿ“‹ Client: ${info.baseClientId}`); + console.log(` ๐Ÿ“„ Tab ID: ${info.tabId || 'default'}`); + console.log(` Composite ID: ${info.id}`); + console.log(` Connected: ${info.connectedAt}`); + console.log(` Address: ${info.remoteAddress || 'unknown'}`); + } + } + console.log(''); + } + + async runTabEvaluation(clientId, tabId, evaluationId) { + const compositeClientId = `${clientId}:${tabId}`; + console.log(`\\n๐ŸŽฏ Running evaluation '${evaluationId}' on tab '${tabId}' of client '${clientId}'...`); + + try { + const client = this.connectedClients.get(compositeClientId); + if (!client) { + console.log(`โŒ Tab '${tabId}' of client '${clientId}' is not connected`); + return; + } + + const evaluation = this.server.evaluationLoader.getEvaluationById(evaluationId); + if (!evaluation) { + console.log(`โŒ Evaluation '${evaluationId}' not found`); + return; + } + + const result = await client.evaluate(evaluation); + console.log(`โœ… Evaluation '${evaluationId}' completed successfully on tab '${tabId}'`); + console.log(`Result: ${JSON.stringify(result, null, 2)}`); + + } catch (error) { + console.log(`โŒ Tab evaluation failed: ${error.message}`); + } + } + + quit() { + console.log('\\n๐Ÿ‘‹ Shutting down...'); + this.server.stop(); + this.rl.close(); + process.exit(0); + } +} \ No newline at end of file diff --git a/eval-server/nodejs/src/cli/index.js b/eval-server/nodejs/src/cli/index.js new file mode 100644 index 0000000..f9d5c41 --- /dev/null +++ b/eval-server/nodejs/src/cli/index.js @@ -0,0 +1,23 @@ +#!/usr/bin/env node + +// Copyright 2025 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +import { EvaluationCLI } from './CLI.js'; + +// Start CLI if this file is run directly +if (import.meta.url === `file://${process.argv[1]}`) { + const cli = new EvaluationCLI(); + + process.on('SIGINT', () => { + cli.quit(); + }); + + cli.start().catch(error => { + console.error('Failed to start CLI:', error.message); + process.exit(1); + }); +} + +export { EvaluationCLI }; \ No newline at end of file diff --git a/eval-server/nodejs/src/client-manager.js b/eval-server/nodejs/src/client-manager.js new file mode 100644 index 0000000..d21b88d --- /dev/null +++ b/eval-server/nodejs/src/client-manager.js @@ -0,0 +1,576 @@ +import fs from 'fs'; +import path from 'path'; +import yaml from 'js-yaml'; +import { v4 as uuidv4 } from 'uuid'; +import logger from './logger.js'; + +class ClientManager { + constructor(clientsDir = './clients', evalsDir = './evals') { + this.clientsDir = path.resolve(clientsDir); + this.evalsDir = path.resolve(evalsDir); + this.clients = new Map(); + this.evaluations = new Map(); // clientId -> evaluations array + this.configDefaults = null; // Config.yaml defaults for model precedence + this.activeTabs = new Map(); // clientId -> Set of { tabId, connection, metadata } + + // Ensure directories exist + if (!fs.existsSync(this.clientsDir)) { + fs.mkdirSync(this.clientsDir, { recursive: true }); + } + if (!fs.existsSync(this.evalsDir)) { + fs.mkdirSync(this.evalsDir, { recursive: true }); + } + + this.loadConfigDefaults(); + this.loadAllClients(); + this.loadAllEvaluations(); + } + + /** + * Load default model configuration from config.yaml + */ + loadConfigDefaults() { + try { + const configPath = path.resolve(this.evalsDir, 'config.yaml'); + if (fs.existsSync(configPath)) { + const configContent = fs.readFileSync(configPath, 'utf8'); + this.configDefaults = yaml.load(configContent); + logger.info('Loaded config.yaml defaults:', this.configDefaults); + } else { + // Don't warn about missing config.yaml - it's optional + this.configDefaults = null; + } + } catch (error) { + logger.error('Failed to load config.yaml:', error); + this.configDefaults = null; + } + } + + /** + * Apply model precedence: API calls OR test YAML models override config.yaml fallback + * Precedence logic: + * 1. API calls OR individual test YAML models (highest priority - either overrides everything) + * 2. config.yaml defaults (fallback only when neither API nor test YAML specify models) + * @param {Object} evaluation - Evaluation object with optional model configuration + * @param {import('../types/model-config').ModelConfig} apiModelOverride - Optional API model override + * @returns {import('../types/model-config').ModelConfig} Final model configuration + */ + applyModelPrecedence(evaluation, apiModelOverride = null) { + // Check if API override is provided + if (apiModelOverride) { + // API model override takes precedence over everything + // Ensure nested format is used + return apiModelOverride; + } + + // Check if evaluation has its own model config from YAML + const testModel = evaluation.model; + if (testModel && Object.keys(testModel).length > 0) { + // Test YAML model takes precedence + // Ensure nested format is returned + return testModel; + } + + // Neither API nor test YAML specified models, use config.yaml defaults only + return this.configDefaults?.model || {}; + } + + /** + * Load all client YAML files on startup + */ + loadAllClients() { + try { + const files = fs.readdirSync(this.clientsDir) + .filter(f => f.endsWith('.yaml') || f.endsWith('.yml')) + .filter(f => { + // Only load base client YAML files, not composite ones with tab IDs + const clientId = path.basename(f, path.extname(f)); + return !clientId.includes(':'); + }); + + for (const file of files) { + const clientId = path.basename(file, path.extname(file)); + try { + this.loadClient(clientId); + } catch (error) { + logger.error(`Failed to load client ${clientId}:`, error); + } + } + + logger.info(`Loaded ${this.clients.size} clients`); + } catch (error) { + logger.error('Failed to load clients:', error); + } + } + + /** + * Load a specific client's YAML configuration + */ + loadClient(clientId) { + const yamlPath = path.join(this.clientsDir, `${clientId}.yaml`); + + if (!fs.existsSync(yamlPath)) { + throw new Error(`Client YAML not found: ${yamlPath}`); + } + + const yamlContent = fs.readFileSync(yamlPath, 'utf8'); + const config = yaml.load(yamlContent); + + // Validate client configuration + if (!config.client || config.client.id !== clientId) { + throw new Error(`Invalid client configuration: ID mismatch`); + } + + // Store client info + this.clients.set(clientId, { + id: config.client.id, + name: config.client.name, + secretKey: config.client.secret_key, + description: config.client.description, + settings: config.settings || {}, + yamlPath + }); + + // Note: Evaluations are now loaded separately from the evals directory + // Initialize empty evaluations array for this client + if (!this.evaluations.has(clientId)) { + this.evaluations.set(clientId, []); + } + + logger.info(`Loaded client ${clientId}`); + return config; + } + + /** + * Load all evaluations from the evals directory structure + */ + loadAllEvaluations() { + try { + // Clear existing evaluations to prevent duplicates on reload + this.evaluations.clear(); + + // Find all category directories + const categories = fs.readdirSync(this.evalsDir) + .filter(dir => fs.statSync(path.join(this.evalsDir, dir)).isDirectory()); + + let totalEvaluations = 0; + + for (const category of categories) { + const categoryDir = path.join(this.evalsDir, category); + const evalFiles = fs.readdirSync(categoryDir) + .filter(f => f.endsWith('.yaml') || f.endsWith('.yml')); + + for (const file of evalFiles) { + try { + const evalPath = path.join(categoryDir, file); + const yamlContent = fs.readFileSync(evalPath, 'utf8'); + const evaluation = yaml.load(yamlContent); + + if (evaluation.enabled !== false) { + // Apply model precedence: config.yaml overrides individual test models + const resolvedModel = this.applyModelPrecedence(evaluation); + + // Add evaluation to all clients for now + // In the future, you might want to have client-specific evaluation assignments + for (const [clientId] of this.clients) { + const clientEvals = this.evaluations.get(clientId) || []; + clientEvals.push({ + ...evaluation, + model: resolvedModel, // Use resolved model with precedence applied + clientId, + status: 'pending', + category, + filePath: evalPath + }); + this.evaluations.set(clientId, clientEvals); + } + totalEvaluations++; + } + } catch (error) { + logger.error(`Failed to load evaluation ${file}:`, error); + } + } + } + + // Update the client evaluation counts + for (const [clientId] of this.clients) { + const evalCount = this.evaluations.get(clientId)?.length || 0; + logger.info(`Loaded client ${clientId} with ${evalCount} evaluations`); + } + + logger.info(`Loaded ${totalEvaluations} evaluations from ${categories.length} categories`); + } catch (error) { + logger.error('Failed to load evaluations:', error); + } + } + + /** + * Register a new client with authentication + */ + registerClient(clientId, secretKey, capabilities, skipSecretValidation = false) { + const client = this.clients.get(clientId); + + if (!client) { + throw new Error(`Client ${clientId} not found. Please create a YAML configuration file.`); + } + + // Verify secret key if configured (unless we're skipping validation) + if (!skipSecretValidation && client.secretKey && client.secretKey !== secretKey) { + throw new Error('Invalid secret key'); + } + + // Update client capabilities + client.capabilities = capabilities; + client.lastRegistered = new Date().toISOString(); + + return { + success: true, + clientName: client.name, + evaluationsCount: this.evaluations.get(clientId)?.length || 0 + }; + } + + /** + * Get client information + */ + getClient(clientId) { + return this.clients.get(clientId); + } + + /** + * Get evaluations for a client + */ + getClientEvaluations(clientId) { + return this.evaluations.get(clientId) || []; + } + + /** + * Get next pending evaluation for a client + */ + getNextEvaluation(clientId) { + const evaluations = this.evaluations.get(clientId) || []; + return evaluations.find(e => e.status === 'pending'); + } + + /** + * Update evaluation status + */ + updateEvaluationStatus(clientId, evaluationId, status, result = null) { + const evaluations = this.evaluations.get(clientId); + if (!evaluations) return; + + const evaluation = evaluations.find(e => e.id === evaluationId); + if (evaluation) { + evaluation.status = status; + evaluation.lastRun = new Date().toISOString(); + if (result) { + evaluation.lastResult = result; + } + } + } + + /** + * Create a new client with default configuration + */ + async createClient(clientName, secretKey = null) { + const clientId = uuidv4(); + return this.createClientWithId(clientId, clientName, secretKey); + } + + /** + * Create a new client with a specific ID + */ + async createClientWithId(clientId, clientName, secretKey = null) { + const yamlPath = path.join(this.clientsDir, `${clientId}.yaml`); + + // Create simplified client configuration (evaluations come from evals directory) + const defaultConfig = { + client: { + id: clientId, + name: clientName, + secret_key: secretKey, + description: `Auto-generated DevTools evaluation client` + }, + settings: { + max_concurrent_evaluations: 3, + default_timeout: 45000, + retry_policy: { + max_retries: 2, + backoff_multiplier: 2, + initial_delay: 1000 + } + } + }; + + // Write YAML file + const yamlContent = yaml.dump(defaultConfig, { indent: 2 }); + fs.writeFileSync(yamlPath, yamlContent); + + // Load the new client + this.loadClient(clientId); + + // Load evaluations for the new client + this.loadAllEvaluations(); + + logger.info(`Created new client: ${clientId}`); + return { clientId, yamlPath }; + } + + /** + * Reload a specific client's configuration + */ + reloadClient(clientId) { + try { + this.loadClient(clientId); + logger.info(`Reloaded client: ${clientId}`); + return true; + } catch (error) { + logger.error(`Failed to reload client ${clientId}:`, error); + return false; + } + } + + /** + * Get all active clients + */ + getAllClients() { + return Array.from(this.clients.values()); + } + + /** + * Validate client exists and is authorized + */ + validateClient(clientId, secretKey = null, skipSecretValidation = false) { + const client = this.clients.get(clientId); + + logger.debug('validateClient', { + clientId, + clientExists: !!client, + hasSecretKey: !!secretKey, + skipSecretValidation, + clientSecretKey: client ? '[REDACTED]' : 'N/A' + }); + + if (!client) { + logger.debug('Client not found', { clientId }); + return { valid: false, reason: 'Client not found' }; + } + + // Skip secret key validation if explicitly requested (for new auth flow) + if (!skipSecretValidation && secretKey !== null && client.secretKey && client.secretKey !== secretKey) { + logger.warn('Secret key mismatch', { + clientId, + hasProvidedKey: !!secretKey, + hasStoredKey: !!client.secretKey + }); + return { valid: false, reason: 'Invalid secret key' }; + } + + logger.debug('Client validation successful', { clientId }); + return { valid: true }; + } + + /** + * Parse composite client ID to extract base client ID and tab ID + * Format: baseClientId:tabId + */ + parseCompositeClientId(compositeClientId) { + if (compositeClientId.includes(':')) { + const [baseClientId, tabId] = compositeClientId.split(':', 2); + return { baseClientId, tabId, isComposite: true }; + } + return { baseClientId: compositeClientId, tabId: null, isComposite: false }; + } + + /** + * Register a tab for a client + */ + registerTab(compositeClientId, connection, metadata = {}) { + const { baseClientId, tabId } = this.parseCompositeClientId(compositeClientId); + + if (!this.activeTabs.has(baseClientId)) { + this.activeTabs.set(baseClientId, new Set()); + } + + const tabs = this.activeTabs.get(baseClientId); + const tabInfo = { + tabId: tabId || 'default', + compositeClientId, + connection, + connectedAt: new Date().toISOString(), + ...metadata + }; + + // Remove existing tab with same ID if it exists + tabs.forEach(existingTab => { + if (existingTab.tabId === tabInfo.tabId) { + tabs.delete(existingTab); + } + }); + + tabs.add(tabInfo); + + logger.info('Tab registered', { + baseClientId, + tabId: tabInfo.tabId, + compositeClientId, + totalTabs: tabs.size + }); + + return tabInfo; + } + + /** + * Unregister a tab for a client + */ + unregisterTab(compositeClientId) { + const { baseClientId, tabId } = this.parseCompositeClientId(compositeClientId); + + if (!this.activeTabs.has(baseClientId)) { + return false; + } + + const tabs = this.activeTabs.get(baseClientId); + const targetTabId = tabId || 'default'; + + let removed = false; + tabs.forEach(tab => { + if (tab.tabId === targetTabId) { + tabs.delete(tab); + removed = true; + } + }); + + // Remove client entry if no tabs remain + if (tabs.size === 0) { + this.activeTabs.delete(baseClientId); + } + + if (removed) { + logger.info('Tab unregistered', { + baseClientId, + tabId: targetTabId, + compositeClientId, + remainingTabs: tabs.size + }); + } + + return removed; + } + + /** + * Get all active tabs for a client + */ + getClientTabs(baseClientId) { + const tabs = this.activeTabs.get(baseClientId); + return tabs ? Array.from(tabs) : []; + } + + /** + * Get all clients with their active tabs + */ + getAllClientsWithTabs() { + const result = []; + + for (const [baseClientId, tabs] of this.activeTabs) { + const client = this.clients.get(baseClientId); + if (client) { + result.push({ + ...client, + baseClientId, + activeTabs: Array.from(tabs), + tabCount: tabs.size + }); + } + } + + return result; + } + + /** + * Get a specific tab by composite client ID + */ + getTab(compositeClientId) { + const { baseClientId, tabId } = this.parseCompositeClientId(compositeClientId); + const tabs = this.activeTabs.get(baseClientId); + + if (!tabs) return null; + + const targetTabId = tabId || 'default'; + for (const tab of tabs) { + if (tab.tabId === targetTabId) { + return tab; + } + } + + return null; + } + + /** + * Get total tab count across all clients + */ + getTotalTabCount() { + let total = 0; + for (const tabs of this.activeTabs.values()) { + total += tabs.size; + } + return total; + } + + /** + * Cleanup stale tab references (called on disconnection) + */ + cleanupStaleTab(baseClientId, tabId) { + if (!this.activeTabs.has(baseClientId)) { + return; + } + + const tabs = this.activeTabs.get(baseClientId); + const targetTabId = tabId || 'default'; + + // Find and remove stale tab references + const staleTabs = Array.from(tabs).filter(tab => + tab.tabId === targetTabId && + (!tab.connection || tab.connection.ws.readyState !== tab.connection.ws.OPEN) + ); + + staleTabs.forEach(staleTab => { + tabs.delete(staleTab); + logger.debug('Cleaned up stale tab reference', { + baseClientId, + tabId: staleTab.tabId + }); + }); + + // Remove client entry if no tabs remain + if (tabs.size === 0) { + this.activeTabs.delete(baseClientId); + } + } + + /** + * Periodic cleanup of all stale tab connections + */ + cleanupStaleConnections() { + for (const [baseClientId, tabs] of this.activeTabs) { + const staleTabs = Array.from(tabs).filter(tab => + !tab.connection || tab.connection.ws.readyState !== tab.connection.ws.OPEN + ); + + staleTabs.forEach(staleTab => { + tabs.delete(staleTab); + logger.debug('Cleaned up stale connection', { + baseClientId, + tabId: staleTab.tabId + }); + }); + + // Remove client entry if no tabs remain + if (tabs.size === 0) { + this.activeTabs.delete(baseClientId); + } + } + } +} + +export { ClientManager }; \ No newline at end of file diff --git a/eval-server/nodejs/src/config.js b/eval-server/nodejs/src/config.js new file mode 100644 index 0000000..4bde4e5 --- /dev/null +++ b/eval-server/nodejs/src/config.js @@ -0,0 +1,78 @@ +import { config } from 'dotenv'; + +config(); + +export const CONFIG = { + server: { + port: parseInt(process.env.PORT) || 8080, + host: process.env.HOST || 'localhost' + }, + + llm: { + apiKey: process.env.OPENAI_API_KEY, + model: process.env.JUDGE_MODEL || 'gpt-4', + temperature: parseFloat(process.env.JUDGE_TEMPERATURE) || 0.1 + }, + + // LLM Provider Configuration for configure_llm API + providers: { + openai: { + apiKey: process.env.OPENAI_API_KEY + }, + litellm: { + endpoint: process.env.LITELLM_ENDPOINT, + apiKey: process.env.LITELLM_API_KEY + }, + groq: { + apiKey: process.env.GROQ_API_KEY + }, + openrouter: { + apiKey: process.env.OPENROUTER_API_KEY + } + }, + + // Default model configuration + defaults: { + provider: process.env.DEFAULT_PROVIDER || 'openai', + mainModel: process.env.DEFAULT_MAIN_MODEL || 'gpt-4', + miniModel: process.env.DEFAULT_MINI_MODEL || 'gpt-4-mini', + nanoModel: process.env.DEFAULT_NANO_MODEL || 'gpt-3.5-turbo' + }, + + logging: { + level: process.env.LOG_LEVEL || 'info', + dir: process.env.LOG_DIR || './logs' + }, + + rpc: { + timeout: parseInt(process.env.RPC_TIMEOUT) || 1500000, // 25 minutes default + maxConcurrentEvaluations: parseInt(process.env.MAX_CONCURRENT_EVALUATIONS) || 10 + }, + + security: { + authSecretKey: process.env.AUTH_SECRET_KEY + }, + + clients: { + dir: process.env.CLIENTS_DIR || './clients' + }, + + evals: { + dir: process.env.EVALS_DIR || './evals' + } +}; + +export function validateConfig(requireLLM = false) { + const errors = []; + + // Only require OpenAI API key if LLM judge is explicitly needed + if (requireLLM && !CONFIG.llm.apiKey) { + errors.push('OPENAI_API_KEY is required when using LLM judge'); + } + + if (CONFIG.server.port < 1 || CONFIG.server.port > 65535) { + errors.push('PORT must be between 1 and 65535'); + } + + return errors; +} \ No newline at end of file diff --git a/eval-server/nodejs/src/evaluator.js b/eval-server/nodejs/src/evaluator.js new file mode 100644 index 0000000..95ac14a --- /dev/null +++ b/eval-server/nodejs/src/evaluator.js @@ -0,0 +1,117 @@ +import OpenAI from 'openai'; +import { CONFIG } from './config.js'; +import logger from './logger.js'; + +export class LLMEvaluator { + constructor() { + if (!CONFIG.llm.apiKey) { + throw new Error('OpenAI API key is required'); + } + + this.openai = new OpenAI({ + apiKey: CONFIG.llm.apiKey + }); + } + + async evaluate(task, agentResponse) { + try { + const prompt = this.buildEvaluationPrompt(task, agentResponse); + + const completion = await this.openai.chat.completions.create({ + model: CONFIG.llm.model, + messages: [ + { + role: 'system', + content: 'You are an expert evaluator of AI agent responses. Provide objective, detailed evaluations.' + }, + { + role: 'user', + content: prompt + } + ], + temperature: CONFIG.llm.temperature, + max_tokens: 1000 + }); + + const evaluation = completion.choices[0].message.content; + const usage = completion.usage; + + logger.info('LLM evaluation completed', { + tokens_used: usage.total_tokens, + model: CONFIG.llm.model + }); + + return this.parseEvaluation(evaluation); + } catch (error) { + logger.error('LLM evaluation failed', { error: error.message }); + throw error; + } + } + + buildEvaluationPrompt(task, agentResponse) { + return `Please evaluate the following AI agent response to a given task. + +TASK: +${task} + +AGENT RESPONSE: +${agentResponse} + +Please evaluate the response on the following criteria and provide a JSON response: + +1. **Correctness**: Is the response factually accurate and correct? +2. **Completeness**: Does the response fully address the task? +3. **Clarity**: Is the response clear and well-structured? +4. **Relevance**: Is the response relevant to the task? +5. **Helpfulness**: How helpful is the response to the user? + +Provide your evaluation in the following JSON format: +{ + "overall_score": , + "criteria_scores": { + "correctness": , + "completeness": , + "clarity": , + "relevance": , + "helpfulness": + }, + "reasoning": "", + "strengths": [""], + "weaknesses": [""], + "suggestions": [""] +}`; + } + + parseEvaluation(evaluationText) { + try { + // Try to extract JSON from the response + const jsonMatch = evaluationText.match(/\{[\s\S]*\}/); + if (jsonMatch) { + return JSON.parse(jsonMatch[0]); + } + + // If no JSON found, return a structured response with the raw text + return { + overall_score: null, + criteria_scores: {}, + reasoning: evaluationText, + strengths: [], + weaknesses: [], + suggestions: [], + raw_evaluation: evaluationText + }; + } catch (error) { + logger.warn('Failed to parse evaluation JSON', { error: error.message }); + return { + overall_score: null, + criteria_scores: {}, + reasoning: evaluationText, + strengths: [], + weaknesses: [], + suggestions: [], + raw_evaluation: evaluationText, + parse_error: error.message + }; + } + } +} \ No newline at end of file diff --git a/eval-server/nodejs/src/lib/EvalServer.js b/eval-server/nodejs/src/lib/EvalServer.js new file mode 100644 index 0000000..d174c7a --- /dev/null +++ b/eval-server/nodejs/src/lib/EvalServer.js @@ -0,0 +1,923 @@ +// Copyright 2025 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +import { EventEmitter } from 'events'; +import { v4 as uuidv4 } from 'uuid'; +import { WebSocketServer } from 'ws'; + +import { ClientManager } from '../client-manager.js'; +import { CONFIG, validateConfig } from '../config.js'; +import logger, { logConnection, logEvaluation } from '../logger.js'; +import { RpcClient } from '../rpc-client.js'; +import { EvaluationLoader } from './EvaluationLoader.js'; + +/** + * EvalServer - A library for programmatically managing evaluation servers + * + * Example usage: + * ```js + * const server = new EvalServer({ + * authKey: 'your-secret-key', + * host: '127.0.0.1', + * port: 8080 + * }); + * + * server.onConnect(client => { + * console.log(`Client connected: ${client.id}`); + * + * client.evaluate({ + * id: "test_eval", + * name: "Bloomberg Eval", + * description: "Test Eval for Bloomberg website", + * input: { + * objective: "Navigate to Bloomberg, summarize and return sentiment of the latest news." + * } + * }).then(response => { + * console.log('Evaluation response:', response); + * }); + * }); + * + * server.start(); + * ``` + */ +export class EvalServer extends EventEmitter { + constructor(options = {}) { + super(); + + // Apply configuration options + this.config = { + host: options.host || CONFIG.server.host, + port: options.port || CONFIG.server.port, + authKey: options.authKey || null, + clientsDir: options.clientsDir || './clients', + evalsDir: options.evalsDir || './evals', + ...options + }; + + // Internal state + this.connectedClients = new Map(); + this.clientManager = new ClientManager(this.config.clientsDir, this.config.evalsDir); + this.evaluationLoader = new EvaluationLoader(this.config.evalsDir); + this.judge = null; // Judge is optional - can be set later + this.wss = null; + this.isRunning = false; + + // Bind methods + this.handleConnection = this.handleConnection.bind(this); + } + + /** + * Start the evaluation server + */ + async start() { + if (this.isRunning) { + throw new Error('Server is already running'); + } + + // Validate configuration - only require LLM if judge is configured + const configErrors = validateConfig(!!this.judge); + if (configErrors.length > 0) { + throw new Error(`Configuration errors: ${configErrors.join(', ')}`); + } + + // Create WebSocket server + this.wss = new WebSocketServer({ + port: this.config.port, + host: this.config.host + }); + + this.wss.on('connection', this.handleConnection); + this.wss.on('error', error => { + logger.error('WebSocket server error', { error: error.message }); + this.emit('error', error); + }); + + this.isRunning = true; + logger.info(`Evaluation server started on ws://${this.config.host}:${this.config.port}`); + this.emit('started', { host: this.config.host, port: this.config.port }); + + return this; + } + + /** + * Stop the evaluation server + */ + async stop() { + if (!this.isRunning) { + return; + } + + if (this.wss) { + this.wss.close(); + this.wss = null; + } + + // Close all client connections + for (const [clientId, connection] of this.connectedClients) { + connection.rpcClient.cleanup(); + if (connection.ws.readyState === connection.ws.OPEN) { + connection.ws.close(); + } + } + this.connectedClients.clear(); + + this.isRunning = false; + logger.info('Evaluation server stopped'); + this.emit('stopped'); + } + + /** + * Register a callback for when clients connect + * @param {Function} callback - Called with a ClientProxy instance + */ + onConnect(callback) { + this.on('clientConnected', callback); + return this; + } + + /** + * Register a callback for when clients disconnect + * @param {Function} callback - Called with client info + */ + onDisconnect(callback) { + this.on('clientDisconnected', callback); + return this; + } + + /** + * Set the judge for evaluations (optional) + * @param {Judge} judge - Judge instance for evaluation validation + */ + setJudge(judge) { + // If server is already running, validate LLM config when setting judge + if (this.isRunning) { + const configErrors = validateConfig(true); + if (configErrors.length > 0) { + throw new Error(`Cannot set judge: ${configErrors.join(', ')}`); + } + } + + this.judge = judge; + return this; + } + + + /** + * Get current server status + */ + getStatus() { + const connections = Array.from(this.connectedClients.values()); + const readyClients = connections.filter(client => client.ready).length; + const uniqueBaseClients = new Set(connections.map(c => c.baseClientId).filter(Boolean)).size; + + return { + isRunning: this.isRunning, + connectedClients: this.connectedClients.size, + uniqueBaseClients: uniqueBaseClients, + totalTabs: this.clientManager.getTotalTabCount(), + readyClients: readyClients, + host: this.config.host, + port: this.config.port + }; + } + + /** + * Load evaluations from YAML files + */ + async loadEvaluations(evalsDir = './evals') { + return this.evaluationLoader.loadFromDirectory(evalsDir); + } + + /** + * Get all available evaluations + */ + getEvaluations() { + return this.evaluationLoader.getAllEvaluations(); + } + + /** + * Get the client manager instance + */ + getClientManager() { + return this.clientManager; + } + + /** + * Handle new WebSocket connections + */ + handleConnection(ws, request) { + const connectionId = uuidv4(); + const connection = { + id: connectionId, + ws, + rpcClient: new RpcClient(), + connectedAt: new Date().toISOString(), + remoteAddress: request.socket.remoteAddress, + registered: false, + clientId: null + }; + + this.connectedClients.set(connectionId, connection); + + logConnection({ + event: 'connected', + connectionId, + remoteAddress: connection.remoteAddress, + totalConnections: this.connectedClients.size + }); + + ws.on('message', message => { + this.handleMessage(connection, message); + }); + + ws.on('close', () => { + this.handleDisconnection(connection); + }); + + ws.on('error', error => { + logger.error('WebSocket connection error', { + connectionId: connection.id, + clientId: connection.clientId, + error: error.message + }); + }); + + // Send welcome message + this.sendMessage(ws, { + type: 'welcome', + serverId: 'server-001', + version: '1.0.0', + timestamp: new Date().toISOString() + }); + } + + /** + * Handle incoming messages from clients + */ + async handleMessage(connection, message) { + try { + const data = JSON.parse(message); + + // Handle RPC responses + if (data.jsonrpc === '2.0' && (data.result || data.error) && data.id) { + if (connection.rpcClient.handleResponse(message)) { + return; + } + logger.debug('RPC response could not be handled', { + connectionId: connection.id, + clientId: connection.clientId, + id: data.id + }); + return; + } + + // Handle RPC requests from client to server + if (data.jsonrpc === '2.0' && data.method && data.id) { + await this.handleRpcRequest(connection, data); + return; + } + + // Handle other message types + switch (data.type) { + case 'register': + await this.handleRegistration(connection, data); + break; + case 'ping': + this.sendMessage(connection.ws, { + type: 'pong', + timestamp: new Date().toISOString() + }); + break; + case 'ready': + if (!connection.registered) { + logger.warn('Received ready signal from unregistered client', { + connectionId: connection.id + }); + return; + } + connection.ready = true; + logger.info('Client ready for evaluations', { + clientId: connection.clientId + }); + + // Create client proxy and emit connection event + const clientProxy = new ClientProxy(connection, this); + this.emit('clientConnected', clientProxy); + break; + case 'status': + this.handleStatusUpdate(connection, data); + break; + case 'auth_verify': + this.handleAuthVerification(connection, data); + break; + default: + logger.warn('Unknown message type', { + connectionId: connection.id, + clientId: connection.clientId, + type: data.type + }); + } + } catch (error) { + logger.warn('Failed to parse message', { + connectionId: connection.id, + error: error.message + }); + } + } + + /** + * Handle RPC requests from client to server + */ + async handleRpcRequest(connection, request) { + try { + const { method, params, id } = request; + + logger.info('Received RPC request', { + connectionId: connection.id, + clientId: connection.clientId, + method, + requestId: id + }); + + let result = null; + + switch (method) { + case 'configure_llm': + result = await this.handleConfigureLLM(connection, params); + break; + default: + // JSON-RPC: Method not found + this.sendMessage(connection.ws, { + jsonrpc: '2.0', + error: { + code: -32601, + message: `Method not found: ${method}` + }, + id + }); + return; + } + + // Send success response + this.sendMessage(connection.ws, { + jsonrpc: '2.0', + result, + id + }); + + } catch (error) { + logger.error('RPC request failed', { + connectionId: connection.id, + clientId: connection.clientId, + method: request.method, + requestId: request.id, + error: error.message + }); + + // Send error response + this.sendMessage(connection.ws, { + jsonrpc: '2.0', + error: { + code: -32603, // Internal error + message: error.message + }, + id: request.id + }); + } + } + + /** + * Handle configure_llm RPC method + */ + async handleConfigureLLM(connection, params) { + if (!connection.registered) { + throw new Error('Client must be registered before configuring LLM'); + } + + const { provider, apiKey, endpoint, models, partial = false } = params; + + // Validate inputs + const supportedProviders = ['openai', 'litellm', 'groq', 'openrouter']; + if (partial) { + // For partial updates, validate only provided fields + if (provider && !supportedProviders.includes(provider)) { + throw new Error(`Unsupported provider: ${provider}. Supported providers: ${supportedProviders.join(', ')}`); + } + if (models && models.main === '') { + throw new Error('Main model cannot be empty'); + } + } else { + // For full updates, require provider and main model + if (!provider || !supportedProviders.includes(provider)) { + throw new Error(`Unsupported or missing provider: ${provider ?? '(none)'}. Supported providers: ${supportedProviders.join(', ')}`); + } + if (!models || !models.main) { + throw new Error('Main model is required'); + } + } + + // Store configuration for this client connection + if (!connection.llmConfig) { + connection.llmConfig = {}; + } + + // Apply configuration (full or partial update) + if (partial && connection.llmConfig) { + // Partial update - merge with existing config + connection.llmConfig = { + ...connection.llmConfig, + provider: provider || connection.llmConfig.provider, + apiKey: apiKey || connection.llmConfig.apiKey, + endpoint: endpoint || connection.llmConfig.endpoint, + models: { + ...connection.llmConfig.models, + ...models + } + }; + } else { + // Full update - replace entire config + connection.llmConfig = { + provider, + apiKey: apiKey || CONFIG.providers[provider]?.apiKey, + endpoint: endpoint || CONFIG.providers[provider]?.endpoint, + models: { + main: models.main, + mini: models.mini || models.main, + nano: models.nano || models.mini || models.main + } + }; + } + + logger.info('LLM configuration updated', { + clientId: connection.clientId, + provider: connection.llmConfig.provider, + models: connection.llmConfig.models, + hasApiKey: !!connection.llmConfig.apiKey, + hasEndpoint: !!connection.llmConfig.endpoint + }); + + return { + status: 'success', + message: 'LLM configuration updated successfully', + appliedConfig: { + provider: connection.llmConfig.provider, + models: connection.llmConfig.models + } + }; + } + + /** + * Handle client registration + */ + async handleRegistration(connection, data) { + try { + const { clientId, secretKey, capabilities } = data; + const { baseClientId, tabId, isComposite } = this.clientManager.parseCompositeClientId(clientId); + + logger.info('Registration attempt', { + clientId, + baseClientId, + tabId: tabId || 'default', + isComposite, + hasSecretKey: !!secretKey + }); + + // Check if base client exists + const validation = this.clientManager.validateClient(baseClientId, null, true); + if (!validation.valid) { + if (validation.reason === 'Client not found') { + // Auto-create new client configuration + try { + logger.info('Auto-creating new client configuration', { baseClientId, clientId }); + await this.clientManager.createClientWithId(baseClientId, `DevTools Client ${baseClientId.substring(0, 8)}`, 'hello'); + + this.sendMessage(connection.ws, { + type: 'registration_ack', + clientId, + status: 'rejected', + reason: 'New client created. Please reconnect to complete registration.', + newClient: true + }); + return; + } catch (error) { + this.sendMessage(connection.ws, { + type: 'registration_ack', + clientId, + status: 'rejected', + reason: `Failed to create client configuration: ${error.message}` + }); + return; + } + } else { + this.sendMessage(connection.ws, { + type: 'registration_ack', + clientId, + status: 'rejected', + reason: validation.reason + }); + return; + } + } + + // Get client info + const client = this.clientManager.getClient(baseClientId); + if (!client) { + this.sendMessage(connection.ws, { + type: 'registration_ack', + clientId, + status: 'rejected', + reason: 'Client configuration not found' + }); + return; + } + + // Send server's secret key to client for verification + this.sendMessage(connection.ws, { + type: 'registration_ack', + clientId, + status: 'auth_required', + serverSecretKey: client.secretKey || '', + message: 'Please verify secret key' + }); + + connection.clientId = clientId; + connection.capabilities = capabilities; + connection.awaitingAuth = true; + + } catch (error) { + logger.error('Registration error', { error: error.message }); + this.sendMessage(connection.ws, { + type: 'registration_ack', + clientId: data.clientId, + status: 'rejected', + reason: error.message + }); + } + } + + /** + * Handle auth verification + */ + handleAuthVerification(connection, data) { + if (!connection.awaitingAuth) { + return; + } + + const { clientId, verified } = data; + + if (verified) { + const { baseClientId, tabId, isComposite } = this.clientManager.parseCompositeClientId(clientId); + + const result = this.clientManager.registerClient(baseClientId, '', connection.capabilities, true); + + connection.registered = true; + connection.awaitingAuth = false; + connection.compositeClientId = clientId; + connection.baseClientId = baseClientId; + connection.tabId = tabId; + + // Register tab with client manager + this.clientManager.registerTab(clientId, connection, { + remoteAddress: connection.remoteAddress, + userAgent: connection.userAgent || 'unknown' + }); + + // Move connection to use composite clientId as key + this.connectedClients.delete(connection.id); + this.connectedClients.set(clientId, connection); + + this.sendMessage(connection.ws, { + type: 'registration_ack', + clientId, + status: 'accepted', + message: result.clientName ? `Welcome ${result.clientName}` : 'Client authenticated successfully', + evaluationsCount: result.evaluationsCount, + tabId: tabId, + isComposite: isComposite + }); + + logger.info('Client authenticated and registered', { + clientId, + baseClientId, + tabId: tabId || 'default', + isComposite + }); + } else { + this.sendMessage(connection.ws, { + type: 'registration_ack', + clientId, + status: 'rejected', + reason: 'Secret key verification failed' + }); + + connection.ws.close(1008, 'Authentication failed'); + } + } + + /** + * Handle status updates + */ + handleStatusUpdate(connection, data) { + if (!connection.registered) return; + + const { evaluationId, status, progress, message } = data; + + logger.info('Evaluation status update', { + clientId: connection.clientId, + evaluationId, + status, + progress, + message + }); + + this.clientManager.updateEvaluationStatus( + connection.clientId, + evaluationId, + status + ); + } + + /** + * Handle client disconnection and cleanup stale tab references + */ + handleDisconnection(connection) { + connection.rpcClient.cleanup(); + + // Clean up stale tab references + if (connection.registered && connection.compositeClientId) { + this.clientManager.unregisterTab(connection.compositeClientId); + this.connectedClients.delete(connection.compositeClientId); + + // Additional cleanup: ensure tab is removed from activeTabs + const { baseClientId } = this.clientManager.parseCompositeClientId(connection.compositeClientId); + this.clientManager.cleanupStaleTab(baseClientId, connection.tabId); + } else if (connection.clientId) { + this.connectedClients.delete(connection.clientId); + } else { + this.connectedClients.delete(connection.id); + } + + logConnection({ + event: 'disconnected', + connectionId: connection.id, + clientId: connection.compositeClientId || connection.clientId, + baseClientId: connection.baseClientId, + tabId: connection.tabId, + totalConnections: this.connectedClients.size + }); + + this.emit('clientDisconnected', { + clientId: connection.compositeClientId || connection.clientId, + baseClientId: connection.baseClientId, + tabId: connection.tabId + }); + } + + /** + * Send message to WebSocket client + */ + sendMessage(ws, data) { + if (ws.readyState === ws.OPEN) { + try { + ws.send(JSON.stringify(data)); + } catch (error) { + logger.error('Failed to send WebSocket message', { + error: error.message, + messageType: data.type + }); + } + } else { + logger.warn('Cannot send message, WebSocket not open', { + readyState: ws.readyState, + messageType: data.type + }); + } + } + + /** + * Execute evaluation on a specific client + */ + async executeEvaluation(connection, evaluation) { + const startTime = Date.now(); + const rpcId = `rpc-${Date.now()}-${Math.random().toString(36).substring(2, 9)}`; + + try { + logger.info('Starting evaluation', { + clientId: connection.clientId, + evaluationId: evaluation.id, + tool: evaluation.tool + }); + + // Update status to running + this.clientManager.updateEvaluationStatus( + connection.clientId, + evaluation.id, + 'running' + ); + + // Prepare model configuration - use client config if available, otherwise evaluation config, otherwise defaults + let modelConfig = evaluation.model || {}; + + if (connection.llmConfig) { + // New nested format: separate config objects for each model tier + modelConfig = { + main_model: { + provider: connection.llmConfig.provider, + model: connection.llmConfig.models.main, + api_key: connection.llmConfig.apiKey, + endpoint: connection.llmConfig.endpoint + }, + mini_model: { + provider: connection.llmConfig.provider, + model: connection.llmConfig.models.mini, + api_key: connection.llmConfig.apiKey, + endpoint: connection.llmConfig.endpoint + }, + nano_model: { + provider: connection.llmConfig.provider, + model: connection.llmConfig.models.nano, + api_key: connection.llmConfig.apiKey, + endpoint: connection.llmConfig.endpoint + }, + // Include any evaluation-specific overrides + ...modelConfig + }; + } + + // Prepare RPC request + const rpcRequest = { + jsonrpc: '2.0', + method: 'evaluate', + params: { + evaluationId: evaluation.id, + name: evaluation.name, + url: evaluation.target?.url || evaluation.url, + tool: evaluation.tool, + input: evaluation.input, + model: modelConfig, + timeout: evaluation.timeout || 30000, + metadata: { + tags: evaluation.metadata?.tags || [], + retries: evaluation.settings?.retry_policy?.max_retries || 0 + } + }, + id: rpcId + }; + + // Send RPC request + const response = await connection.rpcClient.callMethod( + connection.ws, + 'evaluate', + rpcRequest.params, + evaluation.timeout || 45000 + ); + + // Validate response if needed and judge is available + let validationResult = null; + if (evaluation.validation && this.judge) { + validationResult = await this.validateResponse(response, evaluation); + } + + // Update evaluation status + this.clientManager.updateEvaluationStatus( + connection.clientId, + evaluation.id, + 'completed', + { + response, + validation: validationResult, + duration: Date.now() - startTime + } + ); + + // Log evaluation + logEvaluation({ + evaluationId: evaluation.id, + clientId: connection.clientId, + name: evaluation.name, + tool: evaluation.tool, + response, + validation: validationResult, + timestamp: new Date().toISOString(), + duration: Date.now() - startTime + }); + + return response; + + } catch (error) { + logger.error('Evaluation failed', { + clientId: connection.clientId, + evaluationId: evaluation.id, + error: error.message + }); + + this.clientManager.updateEvaluationStatus( + connection.clientId, + evaluation.id, + 'failed', + { + error: error.message, + duration: Date.now() - startTime + } + ); + + throw error; + } + } + + /** + * Validate response using configured judge + */ + async validateResponse(response, evaluation) { + if (!this.judge) { + logger.warn('Validation requested but no judge configured'); + return { + type: 'no-judge', + result: { message: 'No judge configured for validation' }, + passed: true // Assume passed if no judge + }; + } + + const validation = evaluation.validation; + + if (validation.type === 'llm-judge' || validation.type === 'hybrid') { + const llmConfig = validation.llm_judge || validation.llmJudge; + const criteria = llmConfig?.criteria || []; + const task = `${evaluation.name} - ${evaluation.description || ''}`; + + const judgeResult = await this.judge.evaluate( + task, + JSON.stringify(response.output || response), + { + criteria, + model: llmConfig?.model + } + ); + + return { + type: 'llm-judge', + result: judgeResult, + passed: judgeResult.score >= 0.7 + }; + } + + return null; + } +} + +/** + * ClientProxy - Provides a convenient interface for interacting with connected clients + */ +class ClientProxy { + constructor(connection, server) { + this.connection = connection; + this.server = server; + this.id = connection.compositeClientId || connection.clientId; + this.tabId = connection.tabId; + this.baseClientId = connection.baseClientId; + } + + /** + * Execute an evaluation on this client + */ + async evaluate(evaluation) { + // Ensure evaluation has required fields + const fullEvaluation = { + id: evaluation.id || `eval-${Date.now()}-${Math.random().toString(36).substring(2, 9)}`, + name: evaluation.name || 'Dynamic Evaluation', + description: evaluation.description || 'Programmatically created evaluation', + enabled: true, + tool: evaluation.tool || 'chat', + timeout: evaluation.timeout || 45000, + input: evaluation.input || {}, + model: evaluation.model || {}, + validation: evaluation.validation || { type: 'none' }, + metadata: evaluation.metadata || { tags: ['api', 'dynamic'] }, + ...evaluation + }; + + return this.server.executeEvaluation(this.connection, fullEvaluation); + } + + /** + * Get client information + */ + getInfo() { + return { + id: this.id, + tabId: this.tabId, + baseClientId: this.baseClientId, + connectedAt: this.connection.connectedAt, + remoteAddress: this.connection.remoteAddress, + capabilities: this.connection.capabilities + }; + } + + /** + * Send a custom message to the client + */ + sendMessage(data) { + this.server.sendMessage(this.connection.ws, data); + } +} \ No newline at end of file diff --git a/eval-server/nodejs/src/lib/EvaluationLoader.js b/eval-server/nodejs/src/lib/EvaluationLoader.js new file mode 100644 index 0000000..8f85459 --- /dev/null +++ b/eval-server/nodejs/src/lib/EvaluationLoader.js @@ -0,0 +1,448 @@ +// Copyright 2025 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +import fs from 'fs'; +import path from 'path'; +import yaml from 'js-yaml'; +import logger from '../logger.js'; + +/** + * EvaluationLoader - Handles loading and managing evaluations from YAML files + * + * Example usage: + * ```js + * const loader = new EvaluationLoader('./evals'); + * await loader.loadFromDirectory('./evals'); + * + * const evaluations = loader.getAllEvaluations(); + * const filtered = loader.getEvaluationsByCategory('action-agent'); + * const specific = loader.getEvaluationById('a11y-001'); + * ``` + */ +export class EvaluationLoader { + constructor(evalsDir = './evals') { + this.evalsDir = path.resolve(evalsDir); + this.evaluations = new Map(); // evaluationId -> evaluation + this.categories = new Map(); // category -> evaluations[] + this.configDefaults = null; + + // Ensure directory exists + if (!fs.existsSync(this.evalsDir)) { + fs.mkdirSync(this.evalsDir, { recursive: true }); + } + + this.loadConfigDefaults(); + } + + /** + * Load default model configuration from config.yaml + */ + loadConfigDefaults() { + try { + const configPath = path.resolve(this.evalsDir, 'config.yaml'); + if (fs.existsSync(configPath)) { + const configContent = fs.readFileSync(configPath, 'utf8'); + this.configDefaults = yaml.load(configContent); + logger.info('EvaluationLoader: Loaded config.yaml defaults', this.configDefaults); + } else { + // Don't warn about missing config.yaml - it's optional + this.configDefaults = null; + } + } catch (error) { + logger.error('EvaluationLoader: Failed to load config.yaml:', error); + this.configDefaults = null; + } + } + + /** + * Apply model precedence logic + * API calls OR test YAML models override config.yaml fallback + */ + applyModelPrecedence(evaluation, apiModelOverride = null) { + if (apiModelOverride) { + return { + ...(this.configDefaults?.model || {}), + ...apiModelOverride + }; + } + + const testModel = evaluation.model; + if (testModel && Object.keys(testModel).length > 0) { + return { + ...(this.configDefaults?.model || {}), + ...testModel + }; + } + + return this.configDefaults?.model || {}; + } + + /** + * Load all evaluations from the specified directory + */ + async loadFromDirectory(evalsDir = this.evalsDir) { + try { + this.evalsDir = path.resolve(evalsDir); + + // Clear existing evaluations + this.evaluations.clear(); + this.categories.clear(); + + // Reload config defaults + this.loadConfigDefaults(); + + // Find all category directories + const categories = fs.readdirSync(this.evalsDir) + .filter(dir => { + const fullPath = path.join(this.evalsDir, dir); + return fs.statSync(fullPath).isDirectory(); + }); + + let totalEvaluations = 0; + + for (const category of categories) { + const categoryDir = path.join(this.evalsDir, category); + const evalFiles = fs.readdirSync(categoryDir) + .filter(f => f.endsWith('.yaml') || f.endsWith('.yml')); + + const categoryEvaluations = []; + + for (const file of evalFiles) { + try { + const evalPath = path.join(categoryDir, file); + const evaluation = await this.loadEvaluationFile(evalPath, category); + + if (evaluation && evaluation.enabled !== false) { + this.evaluations.set(evaluation.id, evaluation); + categoryEvaluations.push(evaluation); + totalEvaluations++; + } + } catch (error) { + logger.error(`EvaluationLoader: Failed to load evaluation ${file}:`, error); + } + } + + if (categoryEvaluations.length > 0) { + this.categories.set(category, categoryEvaluations); + } + } + + logger.info(`EvaluationLoader: Loaded ${totalEvaluations} evaluations from ${categories.length} categories`); + return { totalEvaluations, categories: categories.length }; + + } catch (error) { + logger.error('EvaluationLoader: Failed to load evaluations:', error); + throw error; + } + } + + /** + * Load a specific evaluation file + */ + async loadEvaluationFile(filePath, category) { + try { + const yamlContent = fs.readFileSync(filePath, 'utf8'); + const evaluation = yaml.load(yamlContent); + + if (!evaluation || !evaluation.id) { + throw new Error('Evaluation must have an id field'); + } + + // Apply model precedence + const resolvedModel = this.applyModelPrecedence(evaluation); + + // Enhance evaluation with metadata + const enhancedEvaluation = { + ...evaluation, + model: resolvedModel, + category, + filePath, + status: 'pending', + loadedAt: new Date().toISOString() + }; + + // Validate required fields + this.validateEvaluation(enhancedEvaluation); + + return enhancedEvaluation; + + } catch (error) { + logger.error(`EvaluationLoader: Failed to load evaluation file ${filePath}:`, error); + throw error; + } + } + + /** + * Validate evaluation structure + */ + validateEvaluation(evaluation) { + const required = ['id', 'name', 'tool']; + + for (const field of required) { + if (!evaluation[field]) { + throw new Error(`Evaluation missing required field: ${field}`); + } + } + + // Validate tool is supported + const supportedTools = [ + 'action_agent', + 'research_agent', + 'schema_extractor', + 'streamlined_schema_extractor', + 'screenshot_verification', + 'web_task_agent', + 'chat' + ]; + + if (!supportedTools.includes(evaluation.tool)) { + logger.warn(`EvaluationLoader: Unknown tool type: ${evaluation.tool}`); + } + + return true; + } + + /** + * Get all loaded evaluations + */ + getAllEvaluations() { + return Array.from(this.evaluations.values()); + } + + /** + * Get evaluations by category + */ + getEvaluationsByCategory(category) { + return this.categories.get(category) || []; + } + + /** + * Get all available categories + */ + getCategories() { + return Array.from(this.categories.keys()); + } + + /** + * Get evaluation by ID + */ + getEvaluationById(evaluationId) { + return this.evaluations.get(evaluationId); + } + + /** + * Filter evaluations by criteria + */ + filterEvaluations(criteria = {}) { + let evaluations = this.getAllEvaluations(); + + // Filter by category + if (criteria.category) { + evaluations = evaluations.filter(e => e.category === criteria.category); + } + + // Filter by tool + if (criteria.tool) { + evaluations = evaluations.filter(e => e.tool === criteria.tool); + } + + // Filter by tags + if (criteria.tags && criteria.tags.length > 0) { + evaluations = evaluations.filter(e => { + const evalTags = e.metadata?.tags || []; + return criteria.tags.some(tag => evalTags.includes(tag)); + }); + } + + // Filter by enabled status + if (criteria.enabled !== undefined) { + evaluations = evaluations.filter(e => e.enabled === criteria.enabled); + } + + // Filter by priority + if (criteria.priority) { + evaluations = evaluations.filter(e => e.metadata?.priority === criteria.priority); + } + + return evaluations; + } + + /** + * Get evaluation statistics + */ + getStatistics() { + const evaluations = this.getAllEvaluations(); + const stats = { + total: evaluations.length, + byCategory: {}, + byTool: {}, + byStatus: {}, + enabled: 0, + disabled: 0 + }; + + for (const evaluation of evaluations) { + // Count by category + const category = evaluation.category; + stats.byCategory[category] = (stats.byCategory[category] || 0) + 1; + + // Count by tool + const tool = evaluation.tool; + stats.byTool[tool] = (stats.byTool[tool] || 0) + 1; + + // Count by status + const status = evaluation.status || 'pending'; + stats.byStatus[status] = (stats.byStatus[status] || 0) + 1; + + // Count enabled/disabled + if (evaluation.enabled !== false) { + stats.enabled++; + } else { + stats.disabled++; + } + } + + return stats; + } + + /** + * Reload evaluations from disk + */ + async reload() { + return this.loadFromDirectory(this.evalsDir); + } + + /** + * Create a new evaluation programmatically + */ + createEvaluation(evaluationData) { + const evaluation = { + id: evaluationData.id || `eval-${Date.now()}-${Math.random().toString(36).substring(2, 9)}`, + name: evaluationData.name || 'Untitled Evaluation', + description: evaluationData.description || '', + enabled: evaluationData.enabled !== false, + tool: evaluationData.tool || 'chat', + timeout: evaluationData.timeout || 45000, + input: evaluationData.input || {}, + model: this.applyModelPrecedence(evaluationData, evaluationData.model), + validation: evaluationData.validation || { type: 'none' }, + metadata: { + tags: ['programmatic'], + priority: 'medium', + ...evaluationData.metadata + }, + category: evaluationData.category || 'programmatic', + status: 'pending', + loadedAt: new Date().toISOString(), + ...evaluationData + }; + + // Validate the evaluation + this.validateEvaluation(evaluation); + + // Store the evaluation + this.evaluations.set(evaluation.id, evaluation); + + // Add to category + const category = evaluation.category; + if (!this.categories.has(category)) { + this.categories.set(category, []); + } + this.categories.get(category).push(evaluation); + + logger.info(`EvaluationLoader: Created evaluation ${evaluation.id} in category ${category}`); + return evaluation; + } + + /** + * Remove an evaluation + */ + removeEvaluation(evaluationId) { + const evaluation = this.evaluations.get(evaluationId); + if (!evaluation) { + return false; + } + + // Remove from main map + this.evaluations.delete(evaluationId); + + // Remove from category + const category = evaluation.category; + if (this.categories.has(category)) { + const categoryEvals = this.categories.get(category); + const index = categoryEvals.findIndex(e => e.id === evaluationId); + if (index !== -1) { + categoryEvals.splice(index, 1); + + // Remove category if empty + if (categoryEvals.length === 0) { + this.categories.delete(category); + } + } + } + + logger.info(`EvaluationLoader: Removed evaluation ${evaluationId}`); + return true; + } + + /** + * Update an existing evaluation + */ + updateEvaluation(evaluationId, updates) { + const evaluation = this.evaluations.get(evaluationId); + if (!evaluation) { + throw new Error(`Evaluation ${evaluationId} not found`); + } + + // Apply updates + const updatedEvaluation = { + ...evaluation, + ...updates, + id: evaluationId, // Ensure ID doesn't change + updatedAt: new Date().toISOString() + }; + + // Validate updated evaluation + this.validateEvaluation(updatedEvaluation); + + // Update in storage + this.evaluations.set(evaluationId, updatedEvaluation); + + // Update in category if category changed + if (updates.category && updates.category !== evaluation.category) { + // Remove from old category + const oldCategory = evaluation.category; + if (this.categories.has(oldCategory)) { + const oldCategoryEvals = this.categories.get(oldCategory); + const index = oldCategoryEvals.findIndex(e => e.id === evaluationId); + if (index !== -1) { + oldCategoryEvals.splice(index, 1); + if (oldCategoryEvals.length === 0) { + this.categories.delete(oldCategory); + } + } + } + + // Add to new category + const newCategory = updates.category; + if (!this.categories.has(newCategory)) { + this.categories.set(newCategory, []); + } + this.categories.get(newCategory).push(updatedEvaluation); + } else { + // Update existing entry in category + const category = evaluation.category; + if (this.categories.has(category)) { + const categoryEvals = this.categories.get(category); + const index = categoryEvals.findIndex(e => e.id === evaluationId); + if (index !== -1) { + categoryEvals[index] = updatedEvaluation; + } + } + } + + logger.info(`EvaluationLoader: Updated evaluation ${evaluationId}`); + return updatedEvaluation; + } +} \ No newline at end of file diff --git a/eval-server/nodejs/src/lib/EvaluationStack.js b/eval-server/nodejs/src/lib/EvaluationStack.js new file mode 100644 index 0000000..04d7b36 --- /dev/null +++ b/eval-server/nodejs/src/lib/EvaluationStack.js @@ -0,0 +1,85 @@ +// Copyright 2025 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +/** + * EvaluationStack - A simple stack-like structure for managing evaluations + * + * Provides LIFO (Last In, First Out) access to evaluation objects. + * Useful for distributing different evaluations across multiple client connections. + */ +export class EvaluationStack { + constructor() { + this.evaluations = []; + } + + /** + * Add an evaluation to the top of the stack + * @param {Object} evaluation - The evaluation object to add + */ + push(evaluation) { + if (!evaluation || typeof evaluation !== 'object') { + throw new Error('Evaluation must be a valid object'); + } + + // Validate required fields + const requiredFields = ['id', 'name', 'tool', 'input']; + for (const field of requiredFields) { + if (!evaluation[field]) { + throw new Error(`Evaluation missing required field: ${field}`); + } + } + + this.evaluations.push(evaluation); + } + + /** + * Remove and return the evaluation from the top of the stack + * @returns {Object|null} The evaluation object, or null if stack is empty + */ + pop() { + return this.evaluations.pop() || null; + } + + /** + * Check if the stack is empty + * @returns {boolean} True if stack has no evaluations + */ + isEmpty() { + return this.evaluations.length === 0; + } + + /** + * Get the number of evaluations in the stack + * @returns {number} The stack size + */ + size() { + return this.evaluations.length; + } + + /** + * Peek at the top evaluation without removing it + * @returns {Object|null} The top evaluation object, or null if stack is empty + */ + peek() { + if (this.isEmpty()) { + return null; + } + return this.evaluations[this.evaluations.length - 1]; + } + + /** + * Clear all evaluations from the stack + */ + clear() { + this.evaluations = []; + } + + /** + * Get a copy of all evaluations in the stack (top to bottom) + * @returns {Array} Array of evaluation objects + */ + toArray() { + return [...this.evaluations].reverse(); + } +} \ No newline at end of file diff --git a/eval-server/nodejs/src/lib/HTTPWrapper.js b/eval-server/nodejs/src/lib/HTTPWrapper.js new file mode 100644 index 0000000..f377690 --- /dev/null +++ b/eval-server/nodejs/src/lib/HTTPWrapper.js @@ -0,0 +1,93 @@ +// Copyright 2025 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +import { APIServer } from '../api-server.js'; + +/** + * HTTPWrapper - Optional HTTP API wrapper for EvalServer + * + * This provides an HTTP REST API on top of the core EvalServer, + * following the same pattern as the CLI wrapper. + * + * Example usage: + * ```js + * import { EvalServer } from './EvalServer.js'; + * import { HTTPWrapper } from './HTTPWrapper.js'; + * + * const evalServer = new EvalServer({ port: 8080 }); + * const httpWrapper = new HTTPWrapper(evalServer, { port: 8081 }); + * + * await evalServer.start(); + * await httpWrapper.start(); + * ``` + */ +export class HTTPWrapper { + constructor(evalServer, options = {}) { + this.evalServer = evalServer; + this.config = { + port: options.port || 8081, + host: options.host || 'localhost', + ...options + }; + + this.apiServer = new APIServer(evalServer, this.config.port); + this.isRunning = false; + } + + /** + * Start the HTTP API server + */ + async start() { + if (this.isRunning) { + throw new Error('HTTP wrapper is already running'); + } + + if (!this.evalServer.isRunning) { + throw new Error('EvalServer must be started before starting HTTP wrapper'); + } + + this.apiServer.start(); + this.isRunning = true; + + return this; + } + + /** + * Stop the HTTP API server + */ + async stop() { + if (!this.isRunning) { + return; + } + + this.apiServer.stop(); + this.isRunning = false; + } + + /** + * Get the HTTP server port + */ + getPort() { + return this.config.port; + } + + /** + * Get the HTTP server host + */ + getHost() { + return this.config.host; + } + + /** + * Get running status + */ + getStatus() { + return { + isRunning: this.isRunning, + host: this.config.host, + port: this.config.port, + url: `http://${this.config.host}:${this.config.port}` + }; + } +} \ No newline at end of file diff --git a/eval-server/nodejs/src/lib/judges/Judge.js b/eval-server/nodejs/src/lib/judges/Judge.js new file mode 100644 index 0000000..83b0f53 --- /dev/null +++ b/eval-server/nodejs/src/lib/judges/Judge.js @@ -0,0 +1,80 @@ +// Copyright 2025 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +/** + * Judge - Abstract interface for evaluation judges + * + * A Judge is responsible for evaluating the quality of responses from LLM agents. + * Different implementations can provide different evaluation strategies. + */ +export class Judge { + /** + * Evaluate an agent response against a task + * + * @param {string} task - The original task or prompt + * @param {string} agentResponse - The response from the agent + * @param {Object} options - Additional options for evaluation + * @returns {Promise} Evaluation result with scores and feedback + */ + async evaluate(task, agentResponse, options = {}) { + throw new Error('Judge.evaluate() must be implemented by subclass'); + } + + /** + * Get the name of this judge implementation + * @returns {string} The judge name + */ + getName() { + return this.constructor.name; + } + + /** + * Get configuration schema for this judge + * @returns {Object} Configuration schema + */ + getConfigSchema() { + return {}; + } + + /** + * Validate judge configuration + * @param {Object} config - Configuration to validate + * @returns {boolean} Whether configuration is valid + */ + validateConfig(config) { + return true; + } +} + +/** + * Default evaluation result structure + */ +export const DEFAULT_EVALUATION_RESULT = { + overall_score: null, + criteria_scores: {}, + reasoning: '', + strengths: [], + weaknesses: [], + suggestions: [], + metadata: { + judge: 'unknown', + timestamp: null, + duration: null + } +}; + +/** + * Utility function to create a standardized evaluation result + */ +export function createEvaluationResult(overrides = {}) { + return { + ...DEFAULT_EVALUATION_RESULT, + ...overrides, + metadata: { + ...DEFAULT_EVALUATION_RESULT.metadata, + ...overrides.metadata, + timestamp: new Date().toISOString() + } + }; +} \ No newline at end of file diff --git a/eval-server/nodejs/src/lib/judges/LLMJudge.js b/eval-server/nodejs/src/lib/judges/LLMJudge.js new file mode 100644 index 0000000..9e4c8a5 --- /dev/null +++ b/eval-server/nodejs/src/lib/judges/LLMJudge.js @@ -0,0 +1,344 @@ +// Copyright 2025 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +import OpenAI from 'openai'; +import { Judge, createEvaluationResult } from './Judge.js'; +import { CONFIG } from '../../config.js'; +import logger from '../../logger.js'; + +/** + * LLMJudge - Uses an LLM (like GPT-4) to evaluate agent responses + * + * This is a refactored version of the original LLMEvaluator class, + * now implementing the Judge interface for better modularity. + */ +export class LLMJudge extends Judge { + constructor(config = {}) { + super(); + + this.config = { + apiKey: config.apiKey || CONFIG.llm.apiKey, + model: config.model || CONFIG.llm.model, + temperature: config.temperature || CONFIG.llm.temperature, + maxTokens: config.maxTokens || 1000, + ...config + }; + + if (!this.config.apiKey) { + throw new Error('OpenAI API key is required for LLMJudge'); + } + + this.openai = new OpenAI({ + apiKey: this.config.apiKey + }); + } + + /** + * Evaluate an agent response using an LLM + */ + async evaluate(task, agentResponse, options = {}) { + const startTime = Date.now(); + + try { + // Merge options with default config + const evalConfig = { + criteria: [], + model: this.config.model, + temperature: this.config.temperature, + ...options + }; + + const prompt = this.buildEvaluationPrompt(task, agentResponse, evalConfig); + + const completion = await this.openai.chat.completions.create({ + model: evalConfig.model, + messages: [ + { + role: 'system', + content: 'You are an expert evaluator of AI agent responses. Provide objective, detailed evaluations in the requested JSON format.' + }, + { + role: 'user', + content: prompt + } + ], + temperature: evalConfig.temperature, + max_tokens: this.config.maxTokens + }); + + const evaluation = completion.choices[0].message.content; + const usage = completion.usage; + const duration = Date.now() - startTime; + + logger.info('LLMJudge: Evaluation completed', { + tokens_used: usage.total_tokens, + model: evalConfig.model, + duration + }); + + const result = this.parseEvaluation(evaluation); + + // Add metadata + result.metadata = { + judge: this.getName(), + model: evalConfig.model, + timestamp: new Date().toISOString(), + duration, + tokens_used: usage.total_tokens, + criteria: evalConfig.criteria + }; + + return result; + + } catch (error) { + logger.error('LLMJudge: Evaluation failed', { error: error.message }); + + return createEvaluationResult({ + overall_score: 0, + reasoning: `Evaluation failed: ${error.message}`, + metadata: { + judge: this.getName(), + timestamp: new Date().toISOString(), + duration: Date.now() - startTime, + error: error.message + } + }); + } + } + + /** + * Build the evaluation prompt + */ + buildEvaluationPrompt(task, agentResponse, config) { + const { criteria } = config; + + let prompt = `Please evaluate the following AI agent response to a given task. + +TASK: +${task} + +AGENT RESPONSE: +${agentResponse} + +Please evaluate the response on the following criteria and provide a JSON response: + +`; + + // Use custom criteria if provided, otherwise use default criteria + if (criteria && criteria.length > 0) { + criteria.forEach((criterion, index) => { + prompt += `${index + 1}. **${criterion}**: Evaluate how well the response meets this criterion\n`; + }); + } else { + prompt += `1. **Correctness**: Is the response factually accurate and correct? +2. **Completeness**: Does the response fully address the task? +3. **Clarity**: Is the response clear and well-structured? +4. **Relevance**: Is the response relevant to the task? +5. **Helpfulness**: How helpful is the response to the user? +`; + } + + prompt += ` +Provide your evaluation in the following JSON format: +{ + "overall_score": , + "criteria_scores": {`; + + if (criteria && criteria.length > 0) { + criteria.forEach((criterion, index) => { + const key = criterion.toLowerCase().replace(/[^a-z0-9]/g, '_'); + prompt += `\n "${key}": `; + if (index < criteria.length - 1) prompt += ','; + }); + } else { + prompt += ` + "correctness": , + "completeness": , + "clarity": , + "relevance": , + "helpfulness": `; + } + + prompt += ` + }, + "reasoning": "", + "strengths": [""], + "weaknesses": [""], + "suggestions": [""] +}`; + + return prompt; + } + + /** + * Parse the LLM evaluation response + */ + parseEvaluation(evaluationText) { + try { + // Try to extract JSON from the response + const jsonMatch = evaluationText.match(/\{[\s\S]*\}/); + if (jsonMatch) { + const parsedResult = JSON.parse(jsonMatch[0]); + + // Validate and normalize the result + return createEvaluationResult({ + overall_score: this.normalizeScore(parsedResult.overall_score), + criteria_scores: this.normalizeCriteriaScores(parsedResult.criteria_scores || {}), + reasoning: parsedResult.reasoning || '', + strengths: Array.isArray(parsedResult.strengths) ? parsedResult.strengths : [], + weaknesses: Array.isArray(parsedResult.weaknesses) ? parsedResult.weaknesses : [], + suggestions: Array.isArray(parsedResult.suggestions) ? parsedResult.suggestions : [], + raw_evaluation: evaluationText + }); + } + + // If no JSON found, return a structured response with the raw text + return createEvaluationResult({ + overall_score: null, + criteria_scores: {}, + reasoning: evaluationText, + strengths: [], + weaknesses: [], + suggestions: [], + raw_evaluation: evaluationText + }); + + } catch (error) { + logger.warn('LLMJudge: Failed to parse evaluation JSON', { error: error.message }); + + return createEvaluationResult({ + overall_score: null, + criteria_scores: {}, + reasoning: evaluationText, + strengths: [], + weaknesses: [], + suggestions: [], + raw_evaluation: evaluationText, + parse_error: error.message + }); + } + } + + /** + * Normalize score to be between 0 and 10 + */ + normalizeScore(score) { + if (typeof score !== 'number' || isNaN(score)) { + return null; + } + + // Clamp score between 0 and 10 + return Math.max(0, Math.min(10, score)); + } + + /** + * Normalize criteria scores + */ + normalizeCriteriaScores(scores) { + const normalized = {}; + + for (const [criterion, score] of Object.entries(scores)) { + normalized[criterion] = this.normalizeScore(score); + } + + return normalized; + } + + /** + * Get configuration schema + */ + getConfigSchema() { + return { + type: 'object', + properties: { + apiKey: { + type: 'string', + description: 'OpenAI API key' + }, + model: { + type: 'string', + description: 'OpenAI model to use for evaluation', + default: 'gpt-4' + }, + temperature: { + type: 'number', + description: 'Temperature for LLM generation', + minimum: 0, + maximum: 2, + default: 0.1 + }, + maxTokens: { + type: 'number', + description: 'Maximum tokens for evaluation response', + minimum: 100, + maximum: 4000, + default: 1000 + } + }, + required: ['apiKey'] + }; + } + + /** + * Validate configuration + */ + validateConfig(config) { + if (!config.apiKey) { + throw new Error('LLMJudge requires an API key'); + } + + if (config.temperature !== undefined) { + if (typeof config.temperature !== 'number' || config.temperature < 0 || config.temperature > 2) { + throw new Error('Temperature must be a number between 0 and 2'); + } + } + + if (config.maxTokens !== undefined) { + if (typeof config.maxTokens !== 'number' || config.maxTokens < 100 || config.maxTokens > 4000) { + throw new Error('maxTokens must be a number between 100 and 4000'); + } + } + + return true; + } + + /** + * Get available OpenAI models for evaluation + */ + async getAvailableModels() { + try { + const models = await this.openai.models.list(); + return models.data + .filter(model => model.id.includes('gpt')) + .map(model => model.id) + .sort(); + } catch (error) { + logger.error('LLMJudge: Failed to fetch available models', { error: error.message }); + return ['gpt-4', 'gpt-3.5-turbo']; // Fallback list + } + } + + /** + * Test the judge with a simple evaluation + */ + async test() { + const testTask = 'Summarize the main points of artificial intelligence'; + const testResponse = 'AI is a technology that enables machines to perform tasks that typically require human intelligence, such as learning, reasoning, and problem-solving.'; + + try { + const result = await this.evaluate(testTask, testResponse); + return { + success: true, + result, + message: 'LLMJudge test completed successfully' + }; + } catch (error) { + return { + success: false, + error: error.message, + message: 'LLMJudge test failed' + }; + } + } +} \ No newline at end of file diff --git a/eval-server/nodejs/src/logger.js b/eval-server/nodejs/src/logger.js new file mode 100644 index 0000000..c935eb9 --- /dev/null +++ b/eval-server/nodejs/src/logger.js @@ -0,0 +1,103 @@ +import winston from 'winston'; +import { existsSync, mkdirSync } from 'fs'; +import { CONFIG } from './config.js'; + +// Ensure logs directory exists +if (!existsSync(CONFIG.logging.dir)) { + mkdirSync(CONFIG.logging.dir, { recursive: true }); +} + +const logger = winston.createLogger({ + level: CONFIG.logging.level, + format: winston.format.combine( + winston.format.timestamp(), + winston.format.errors({ stack: true }), + winston.format.json() + ), + defaultMeta: { service: 'bo-eval-server' }, + transports: [ + new winston.transports.File({ + filename: `${CONFIG.logging.dir}/error.log`, + level: 'error' + }), + new winston.transports.File({ + filename: `${CONFIG.logging.dir}/combined.log` + }), + new winston.transports.Console({ + format: winston.format.combine( + winston.format.colorize(), + winston.format.simple() + ) + }) + ] +}); + +// Create dedicated evaluation logger once to avoid recreating on each call +const evaluationLogger = winston.createLogger({ + format: winston.format.json(), + transports: [ + new winston.transports.File({ + filename: `${CONFIG.logging.dir}/evaluations.jsonl` + }) + ] +}); + +export function logEvaluation(evaluationData) { + const logEntry = { + type: 'evaluation', + timestamp: new Date().toISOString(), + ...evaluationData + }; + + // Pretty print evaluation summary to console + console.log('\n' + '='.repeat(80)); + console.log(`๐Ÿ“Š EVALUATION COMPLETED: ${evaluationData.name}`); + console.log('='.repeat(80)); + console.log(`๐Ÿ†” ID: ${evaluationData.evaluationId}`); + console.log(`๐Ÿ”ง Tool: ${evaluationData.tool}`); + console.log(`โฑ๏ธ Duration: ${evaluationData.duration}ms`); + console.log(`๐Ÿ‘ค Client: ${evaluationData.clientId}`); + + if (evaluationData.response?.output?.output) { + console.log(`\n๐Ÿ“ Output:\n${evaluationData.response.output.output}`); + } + + if (evaluationData.validation?.result) { + const val = evaluationData.validation.result; + console.log(`\n๐Ÿ“‹ Validation:`); + console.log(` โœ… Passed: ${evaluationData.validation.passed ? 'YES' : 'NO'}`); + console.log(` ๐Ÿ“Š Overall Score: ${val.overall_score}/10`); + if (val.strengths?.length > 0) { + console.log(` ๐Ÿ’ช Strengths: ${val.strengths.join(', ')}`); + } + if (val.weaknesses?.length > 0) { + console.log(` โš ๏ธ Weaknesses: ${val.weaknesses.join(', ')}`); + } + } + + console.log('='.repeat(80) + '\n'); + + // Also log structured data for file logs + logger.info('Evaluation completed', logEntry); + + // Also save to dedicated evaluation log + evaluationLogger.info(logEntry); +} + +export function logRpcCall(callData) { + logger.info('RPC call', { + type: 'rpc', + timestamp: new Date().toISOString(), + ...callData + }); +} + +export function logConnection(connectionData) { + logger.info('Connection event', { + type: 'connection', + timestamp: new Date().toISOString(), + ...connectionData + }); +} + +export default logger; \ No newline at end of file diff --git a/eval-server/nodejs/src/rpc-client.js b/eval-server/nodejs/src/rpc-client.js new file mode 100644 index 0000000..8de13ca --- /dev/null +++ b/eval-server/nodejs/src/rpc-client.js @@ -0,0 +1,122 @@ +import { v4 as uuidv4 } from 'uuid'; +import { CONFIG } from './config.js'; +import { logRpcCall } from './logger.js'; + +export class RpcClient { + constructor() { + this.pendingRequests = new Map(); + } + + async callMethod(ws, method, params, timeout = CONFIG.rpc.timeout) { + return new Promise((resolve, reject) => { + const id = uuidv4(); + const request = { + jsonrpc: '2.0', + method, + params, + id + }; + + // Set up timeout + const timeoutId = setTimeout(() => { + this.pendingRequests.delete(id); + logRpcCall({ + id, + method, + params, + status: 'timeout', + error: 'Request timeout' + }); + reject(new Error(`RPC call timeout after ${timeout}ms`)); + }, timeout); + + // Store the request for correlation + this.pendingRequests.set(id, { + resolve, + reject, + timeoutId, + method, + params, + timestamp: Date.now() + }); + + // Send the request + try { + ws.send(JSON.stringify(request)); + logRpcCall({ + id, + method, + params, + status: 'sent' + }); + } catch (error) { + this.pendingRequests.delete(id); + clearTimeout(timeoutId); + logRpcCall({ + id, + method, + params, + status: 'error', + error: error.message + }); + reject(error); + } + }); + } + + handleResponse(message) { + try { + const response = JSON.parse(message); + + // Check if it's a valid JSON-RPC response + if (response.jsonrpc !== '2.0' || !response.id) { + return false; + } + + const pendingRequest = this.pendingRequests.get(response.id); + if (!pendingRequest) { + return false; + } + + // Clean up + this.pendingRequests.delete(response.id); + clearTimeout(pendingRequest.timeoutId); + + // Handle response + if (response.error) { + logRpcCall({ + id: response.id, + method: pendingRequest.method, + params: pendingRequest.params, + status: 'error', + error: response.error, + duration: Date.now() - pendingRequest.timestamp + }); + pendingRequest.reject(new Error(response.error.message || 'RPC error')); + } else { + logRpcCall({ + id: response.id, + method: pendingRequest.method, + params: pendingRequest.params, + status: 'success', + result: response.result, + duration: Date.now() - pendingRequest.timestamp + }); + pendingRequest.resolve(response.result); + } + + return true; + } catch (error) { + return false; + } + } + + cleanup() { + // Cleanup any pending requests + for (const [id, request] of this.pendingRequests) { + clearTimeout(request.timeoutId); + request.reject(new Error('Connection closed')); + } + this.pendingRequests.clear(); + } +} \ No newline at end of file diff --git a/eval-server/nodejs/templates/default-client.yaml b/eval-server/nodejs/templates/default-client.yaml new file mode 100644 index 0000000..6ada130 --- /dev/null +++ b/eval-server/nodejs/templates/default-client.yaml @@ -0,0 +1,56 @@ +# Default client configuration template +# This file is used as a template when creating new clients + +client: + id: "{CLIENT_ID}" + name: "{CLIENT_NAME}" + secret_key: "{SECRET_KEY}" # Optional + description: "Auto-generated client configuration" + +settings: + max_concurrent_evaluations: 3 + default_timeout: 30000 + retry_policy: + max_retries: 2 + backoff_multiplier: 2 + initial_delay: 1000 + +evaluations: + # Example evaluation - disabled by default + - id: "example-schema-extraction" + name: "Example Schema Extraction" + description: "A sample evaluation for schema extraction" + enabled: false + + target: + url: "https://example.com" + wait_for: "networkidle" + wait_timeout: 5000 + + tool: "extract_data" + timeout: 30000 + + input: + schema: + type: "object" + properties: + title: + type: "string" + description: "Page title" + content: + type: "string" + description: "Main content" + + + validation: + type: "llm-judge" + llm_judge: + model: "gpt-4o-mini" + temperature: 0.3 + criteria: + - "Title should be extracted correctly" + - "Content should be meaningful and not empty" + + metadata: + tags: ["example", "schema-extraction"] + priority: "normal" \ No newline at end of file diff --git a/eval-server/python/README.md b/eval-server/python/README.md new file mode 100644 index 0000000..f167b48 --- /dev/null +++ b/eval-server/python/README.md @@ -0,0 +1,368 @@ +# bo-eval-server (Python) + +A minimal Python library for creating WebSocket-based evaluation servers for LLM agents. + +## Features + +- ๐Ÿ”Œ **WebSocket Server**: Real-time agent connections with asyncio +- ๐Ÿค– **Bidirectional RPC**: JSON-RPC 2.0 for calling methods on connected agents +- ๐Ÿ“š **Programmatic API**: Create and manage evaluations in Python code +- ๐Ÿ“Š **Evaluation Stack**: LIFO stack for managing evaluation queues +- โšก **Concurrent Support**: Full async/await support for multiple agents +- ๐Ÿ” **Enhanced Logging**: Structured logging with loguru +- โœจ **Minimal Dependencies**: Only websockets and loguru required + +## Quick Start + +### Basic WebSocket Server + +```python +import asyncio +from bo_eval_server import EvalServer + +async def main(): + server = EvalServer( + auth_key='hello', + host='127.0.0.1', + port=8080 + ) + + @server.on_connect + async def handle_client(client): + print(f'Client connected: {client.id}') + + response = await client.evaluate({ + "id": "test_eval", + "name": "Capital of France", + "tool": "chat", + "input": {"message": "What is the capital of France?"} + }) + + print(f'Response: {response}') + + await server.start() + print('Server running on ws://127.0.0.1:8080') + + # Keep server running + await server.wait_closed() + +if __name__ == "__main__": + asyncio.run(main()) +``` + +### Using Evaluation Stack + +```python +import asyncio +from bo_eval_server import EvalServer, EvaluationStack + +async def main(): + server = EvalServer(auth_key='secret', port=8080) + stack = EvaluationStack() + + # Add evaluations to stack + stack.push({ + "id": "eval_001", + "name": "Math Question", + "tool": "chat", + "input": {"message": "What is 2 + 2?"} + }) + + stack.push({ + "id": "eval_002", + "name": "Science Question", + "tool": "chat", + "input": {"message": "What is the speed of light?"} + }) + + @server.on_connect + async def handle_client(client): + print(f'Client connected: {client.id}') + + # Process evaluations from stack + while not stack.is_empty(): + evaluation = stack.pop() + try: + result = await client.evaluate(evaluation) + print(f'โœ… {evaluation["name"]}: {result["status"]}') + except Exception as e: + print(f'โŒ {evaluation["name"]}: {e}') + + await server.start() + await server.wait_closed() + +if __name__ == "__main__": + asyncio.run(main()) +``` + +## Installation + +### Using uv (Recommended) + +```bash +# Install uv package manager (if not already installed) +curl -LsSf https://astral.sh/uv/install.sh | sh + +# Install dependencies and create virtual environment +uv sync + +# Run examples using the convenient runner +python run.py basic # Basic server example +python run.py stack # Evaluation stack example +python run.py prog # Programmatic evaluations example +python run.py all # Show all available examples + +# Or run examples directly with uv +uv run python examples/basic_server.py +uv run python examples/with_stack.py +uv run python examples/programmatic_evals.py +``` + +### Using pip (Alternative) + +```bash +# Install the package +pip install -e . + +# Or install with development dependencies +pip install -e ".[dev]" + +# Or install from requirements.txt +pip install -r requirements.txt +``` + +## Library Usage + +### EvalServer API + +```python +from bo_eval_server import EvalServer + +# Create server instance +server = EvalServer( + auth_key='your-secret-key', # Required: client authentication + host='127.0.0.1', # Optional: default 'localhost' + port=8080, # Optional: default 8080 +) + +# Register event handlers +@server.on_connect +async def handle_connect(client): + # Called when client connects and is ready + pass + +@server.on_disconnect +async def handle_disconnect(client_info): + # Called when client disconnects + pass + +# Server lifecycle +await server.start() # Start the server +await server.stop() # Stop the server +await server.wait_closed() # Wait for server to close + +# Server status +status = server.get_status() +print(f"Server running: {status['running']}") +``` + +### Client Proxy API + +```python +@server.on_connect +async def handle_client(client): + # Client information + print(f'Client ID: {client.id}') + print(f'Tab ID: {client.tab_id}') + print(f'Base Client ID: {client.base_client_id}') + + # Execute evaluations + result = await client.evaluate({ + "id": "eval_001", + "name": "Test Evaluation", + "description": "Optional description", + "tool": "chat", + "input": {"message": "Your question here"}, + "timeout": 30.0, # Optional timeout in seconds + "metadata": {"tags": ["api", "test"]} + }) + + # Send custom messages + await client.send_message({ + "type": "custom", + "data": "Hello client!" + }) +``` + +### EvaluationStack API + +```python +from bo_eval_server import EvaluationStack + +stack = EvaluationStack() + +# Add evaluations (LIFO - Last In, First Out) +stack.push({ + "id": "eval_001", + "name": "Test", + "tool": "chat", + "input": {"message": "Hello"} +}) + +# Remove and get evaluation +evaluation = stack.pop() # Returns dict or None if empty + +# Stack operations +size = stack.size() # Get number of evaluations +is_empty = stack.is_empty() # Check if empty +top = stack.peek() # View top without removing +stack.clear() # Remove all evaluations +all_evals = stack.to_array() # Get copy as list +``` + +## Agent Protocol + +Your agent needs to implement the WebSocket protocol: + +### 1. Connect to WebSocket +```python +import websockets +import json + +ws = await websockets.connect('ws://localhost:8080') +``` + +### 2. Receive Authentication Challenge +The server sends an authentication challenge with the secret key: +```python +challenge = json.loads(await ws.recv()) +# Expected: {"type": "auth_challenge", "secretKey": "hello", "connectionId": "uuid"} +``` + +### 3. Send Registration Response +Client validates the secret key and responds: +```python +await ws.send(json.dumps({ + "type": "register", + "clientId": "your-client-id", + "acceptAuth": True, # True if secret key is acceptable + "connectionId": challenge["connectionId"], + "capabilities": ["chat", "action"] +})) +``` + +### 4. Receive Registration Confirmation +```python +confirmation = json.loads(await ws.recv()) +# Expected: {"type": "registered", "clientId": "your-client-id", "serverTime": 123456} +``` + +### 5. Send Ready Signal +```python +await ws.send(json.dumps({"type": "ready"})) +``` + +### 6. Handle RPC Calls +```python +async for message in ws: + data = json.loads(message) + + if data.get("jsonrpc") == "2.0" and data.get("method") == "evaluate": + # Handle evaluation request + result = await handle_evaluation(data["params"]) + + # Send response + await ws.send(json.dumps({ + "jsonrpc": "2.0", + "id": data["id"], + "result": result + })) +``` + +## Architecture + +``` +src/bo_eval_server/ +โ”œโ”€โ”€ __init__.py # Package exports +โ”œโ”€โ”€ eval_server.py # Main EvalServer class +โ”œโ”€โ”€ evaluation_stack.py # EvaluationStack implementation +โ”œโ”€โ”€ client_manager.py # Client connection management +โ”œโ”€โ”€ rpc_client.py # JSON-RPC client implementation +โ”œโ”€โ”€ config.py # Configuration management +โ””โ”€โ”€ logger.py # Enhanced logging setup +``` + +## Design Principles + +- **Async-First**: Built on asyncio for high concurrency +- **Minimal Dependencies**: Only essential packages required +- **Type Hints**: Full typing support for better development experience +- **Event-Driven**: React to client connections with decorators +- **Programmatic**: Full control through Python code +- **Clean API**: Simple, Pythonic interface + +## Examples + +See the `examples/` directory for complete working examples: + +- `basic_server.py` - Simple WebSocket server setup +- `with_stack.py` - Using evaluation stack for queuing +- `programmatic_evals.py` - Creating evaluations in code + +## Evaluation Scripts + +The `evals/` directory contains ready-to-use evaluation scripts for various benchmarks: + +- `browsecomp_eval_server.py` - Browsecomp benchmark server (1,266 web browsing questions) + - Run with: `./evals/run_browsecomp_eval_server.sh` + - See `evals/README.md` for detailed usage + +## Development + +### Using uv + +```bash +# Install with development dependencies +uv sync --dev + +# Run tests +uv run pytest + +# Format code +uv run black src/ examples/ + +# Type checking +uv run mypy src/ + +# Run all development commands +uv run pytest && uv run black src/ examples/ && uv run mypy src/ +``` + +### Using pip + +```bash +# Install in development mode +pip install -e ".[dev]" + +# Run tests +pytest + +# Format code +black src/ examples/ + +# Type checking +mypy src/ +``` + +## Environment Variables + +```bash +# Optional configuration +BO_EVAL_SERVER_HOST=localhost +BO_EVAL_SERVER_PORT=8080 +BO_EVAL_SERVER_LOG_LEVEL=INFO +``` + +--- + +This Python implementation provides the core WebSocket evaluation server functionality with a clean, async API for programmatic evaluation management. \ No newline at end of file diff --git a/eval-server/python/UV_COMMANDS.md b/eval-server/python/UV_COMMANDS.md new file mode 100644 index 0000000..ea79fcb --- /dev/null +++ b/eval-server/python/UV_COMMANDS.md @@ -0,0 +1,188 @@ +# UV Commands Reference + +Quick reference for using uv with bo-eval-server Python implementation. + +## Installation & Setup + +```bash +# Install uv (if not already installed) +curl -LsSf https://astral.sh/uv/install.sh | sh + +# Install project dependencies +uv sync + +# Install with development dependencies +uv sync --dev +``` + +## Running Examples + +### Using the convenience runner (Recommended) + +```bash +# Basic WebSocket server +python run.py basic + +# Evaluation stack example +python run.py stack + +# Programmatic evaluations with analytics +python run.py prog + +# Show all available examples +python run.py all +``` + +### Direct uv execution + +```bash +# Run examples directly +uv run python examples/basic_server.py +uv run python examples/with_stack.py +uv run python examples/programmatic_evals.py + +# Run with custom arguments or environment variables +uv run --env BO_EVAL_SERVER_PORT=8081 python examples/basic_server.py +``` + +## Development Commands + +```bash +# Run tests +uv run pytest + +# Run tests with coverage +uv run pytest --cov=src/bo_eval_server + +# Format code +uv run black . +uv run black src/ examples/ + +# Type checking +uv run mypy src/ + +# Run all checks +uv run pytest && uv run black . && uv run mypy src/ +``` + +## Package Management + +```bash +# Add new dependencies +uv add requests +uv add --dev pytest-cov + +# Remove dependencies +uv remove requests + +# Update dependencies +uv sync --upgrade + +# Show installed packages +uv tree + +# Show project info +uv show +``` + +## Virtual Environment + +```bash +# Activate virtual environment +source .venv/bin/activate # Unix/macOS +# or +.venv\Scripts\activate # Windows + +# Check Python version in venv +uv run python --version + +# Run any command in the virtual environment +uv run +``` + +## Project Scripts + +The project includes entry point scripts defined in `pyproject.toml`: + +```bash +# After installation, these commands become available: +bo-eval-basic # Run basic server example +bo-eval-stack # Run evaluation stack example +bo-eval-programmatic # Run programmatic evaluations example +``` + +## Useful UV Options + +```bash +# Run with specific Python version +uv run --python 3.11 python examples/basic_server.py + +# Run with environment variables +uv run --env DEBUG=1 python examples/basic_server.py + +# Run in isolated environment (no local packages) +uv run --isolated python examples/basic_server.py + +# Show verbose output +uv sync --verbose + +# Force reinstall +uv sync --reinstall +``` + +## Integration with IDEs + +For VS Code and other IDEs, point to the uv-created virtual environment: + +```bash +# Show virtual environment path +echo $PWD/.venv/bin/python + +# Or use uv to find it +uv run which python +``` + +Then configure your IDE to use this Python interpreter for the project. + +## Common Workflows + +### Quick Start Development + +```bash +git clone +cd eval-server/python +uv sync --dev +python run.py basic +``` + +### Running Tests in CI + +```bash +uv sync --dev --frozen +uv run pytest --cov=src/bo_eval_server --cov-report=xml +``` + +### Building and Publishing + +```bash +uv build +uv publish # If publishing to PyPI +``` + +## Troubleshooting + +```bash +# Clear uv cache +uv cache clean + +# Reinstall everything +rm -rf .venv uv.lock +uv sync + +# Check uv version +uv --version + +# Get help +uv --help +uv run --help +``` \ No newline at end of file diff --git a/eval-server/python/evals/README.md b/eval-server/python/evals/README.md new file mode 100644 index 0000000..6d3b082 --- /dev/null +++ b/eval-server/python/evals/README.md @@ -0,0 +1,195 @@ +# Python Evaluation Scripts + +This directory contains evaluation scripts for running various benchmark datasets using the Python eval-server. + +## Available Scripts + +### Browsecomp Evaluation Server + +**Script**: `browsecomp_eval_server.py` +**Wrapper**: `run_browsecomp_eval_server.sh` + +The browsecomp eval server loads questions from the [Browsecomp benchmark](https://github.com/openai/simple-evals) and distributes them to connected BrowserOperator clients via WebSocket connections. + +#### Features + +- Loads and decrypts 1,266 browsecomp questions automatically +- Distributes exactly one question per client connection +- Stack-based LIFO distribution +- **Automatic scoring**: Compares responses against true answers +- **Structured response parsing**: Handles BrowserOperator's message format +- **Comprehensive logging**: Structured logs saved to timestamped files +- Real-time progress tracking with accuracy metrics +- Confidence score extraction and analysis +- Results saved to JSON file for later analysis +- Configurable timeout (default: 60 minutes) +- Configurable server settings + +#### Usage + +```bash +# Use the wrapper script for proper dependencies +./run_browsecomp_eval_server.sh --help + +# List available questions +./run_browsecomp_eval_server.sh --list --list-limit 10 + +# Start server with first 5 questions +./run_browsecomp_eval_server.sh --limit 5 + +# Start server with specific questions +./run_browsecomp_eval_server.sh --questions 1 5 10 25 + +# Start server with a range of questions (questions 10-15) +./run_browsecomp_eval_server.sh --start 10 --end 15 + +# Start server from question 100 to the end +./run_browsecomp_eval_server.sh --start 100 + +# Start server with questions 1-50 +./run_browsecomp_eval_server.sh --end 50 + +# Start server with all 1,266 questions +./run_browsecomp_eval_server.sh + +# Custom configuration +./run_browsecomp_eval_server.sh --limit 20 --port 8081 --auth-key my-key + +# Save results to JSON file +./run_browsecomp_eval_server.sh --limit 10 --save-results +``` + +#### How It Works + +1. **Load Questions**: The server loads browsecomp questions from the dataset +2. **Stack Distribution**: Questions are placed in a LIFO stack +3. **Client Connection**: When a BrowserOperator connects, it receives one question +4. **Processing**: The client processes the question and returns results +5. **Automatic Scoring**: Server compares responses against true answers +6. **Tracking**: Server tracks completion, accuracy, and confidence statistics +7. **Results**: Optionally saves detailed results to JSON file + +#### Example Workflow + +```bash +# Terminal 1: Start the eval server +cd /path/to/eval-server/python/evals +./run_browsecomp_eval_server.sh --limit 10 --save-results + +# Terminal 2+: Connect BrowserOperator clients +# Each client will automatically receive and process one question +``` + +#### Scoring Output + +When evaluations complete, you'll see automatic scoring results: + +``` +โœ… Evaluation completed! +๐Ÿ“Š Response structure: 12 messages, 3 tool calls, gpt-4 model, 45230ms + +๐ŸŽฏ Scoring Results: + - True Answer: 1988-96 + - Extracted Answer: 1988-96 + - Correct: โœ… YES + - Confidence: 85% + +๐Ÿ“Š Current Statistics: + โœ… Completed: 5/10 + โŒ Failed: 0/10 + ๐Ÿ“š Remaining: 5/10 + +๐ŸŽฏ Scoring Statistics: + ๐Ÿ“Š Accuracy: 80.0% (4/5 correct) + ๐Ÿ’ก Average Confidence: 78.5% +``` + +#### Results JSON Format + +When using `--save-results`, evaluations are saved to `browsecomp_eval_results_[timestamp].json`: + +```json +{ + "timestamp": "20240115_143022", + "total_questions": 10, + "completed": 10, + "failed": 0, + "accuracy": 80.0, + "average_confidence": 78.5, + "evaluations": [ + { + "client_id": "abc123...", + "question_id": 1, + "result": "Explanation: ... Exact Answer: 1988-96 Confidence Score: 85%", + "scoring": { + "is_correct": true, + "true_answer": "1988-96", + "extracted_answer": "1988-96", + "confidence": 85 + } + } + ] +} +``` + +#### Logging + +The server creates comprehensive logs in the `./logs/` directory: + +- **Console Output**: Real-time progress with emojis and summaries +- **Structured Logs**: Timestamped log file `browsecomp_eval_server_YYYYMMDD_HHMMSS.log` + +**Structured Log Events**: +``` +EVENT: {"timestamp": "2024-01-15T14:30:22", "event_type": "client_connected", "client_id": "abc123", "stack_remaining": 10} +EVENT: {"timestamp": "2024-01-15T14:30:25", "event_type": "evaluation_assigned", "evaluation_id": "browsecomp_q1", "question_id": 1} +EVENT: {"timestamp": "2024-01-15T14:32:10", "event_type": "evaluation_completed", "is_correct": true, "confidence": 85, "model_used": "gpt-4"} +EVENT: {"timestamp": "2024-01-15T14:35:00", "event_type": "session_completed", "accuracy": 80.0, "total_questions": 10} +``` + +**Log Files Location**: +- `./logs/browsecomp_eval_server_YYYYMMDD_HHMMSS.log` - Main server log +- `./logs/` - Directory also used by eval-server's internal logging + +## Dependencies + +The evaluation scripts require additional dependencies beyond the base eval-server: +- `pandas` - For dataset loading and manipulation +- `requests` - For downloading datasets + +These are automatically installed when you run `uv sync` in the eval-server/python directory. + +## Adding New Evaluation Scripts + +To add a new evaluation script: + +1. Create your script in this directory +2. Import the eval-server modules: + ```python + import sys + from pathlib import Path + sys.path.insert(0, str(Path(__file__).parent.parent / "src")) + from bo_eval_server import EvalServer, EvaluationStack + ``` + +3. Create a wrapper script for easy execution: + ```bash + #!/bin/bash + SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + cd "$SCRIPT_DIR/.." + uv run python evals/your_script.py "$@" + ``` + +4. Make the wrapper executable: `chmod +x your_wrapper.sh` + +## Dataset Files + +- `browsecomp_dataset.py` - Dataset loader for browsecomp questions with automatic decryption support +- `browsecomp_scorer.py` - Scoring logic that extracts answers and compares against ground truth + +## Notes + +- Always use the wrapper scripts (`.sh` files) to ensure proper dependencies are loaded +- The eval server runs on WebSocket protocol (ws://localhost:8080 by default) +- Each connected client receives exactly one evaluation from the stack +- Progress and statistics are shown in real-time during execution \ No newline at end of file diff --git a/eval-server/python/evals/browsecomp_dataset.py b/eval-server/python/evals/browsecomp_dataset.py new file mode 100644 index 0000000..387f713 --- /dev/null +++ b/eval-server/python/evals/browsecomp_dataset.py @@ -0,0 +1,252 @@ +#!/usr/bin/env python3 +""" +Browsecomp Dataset Loader +Handles downloading and decrypting the Browsecomp benchmark dataset +""" + +import base64 +import hashlib +import pandas as pd +from typing import List, Dict, Optional + + +DATASET_URL = "https://openaipublic.blob.core.windows.net/simple-evals/browse_comp_test_set.csv" + + +def derive_key(password: str, length: int) -> bytes: + """Derive a fixed-length key from the password using SHA256.""" + hasher = hashlib.sha256() + hasher.update(password.encode()) + key = hasher.digest() + return key * (length // len(key)) + key[: length % len(key)] + + +def decrypt(ciphertext_b64: str, password: str) -> str: + """Decrypt base64-encoded ciphertext with XOR.""" + encrypted = base64.b64decode(ciphertext_b64) + key = derive_key(password, len(encrypted)) + decrypted = bytes(a ^ b for a, b in zip(encrypted, key)) + return decrypted.decode() + + +class BrowsecompDataset: + def __init__(self, password: Optional[str] = None): + """ + Initialize the Browsecomp dataset loader. + + Args: + password: Decryption password for the dataset. If None, will try + to read from BROWSECOMP_PASSWORD environment variable. + """ + self.password = password + if not self.password: + import os + self.password = os.environ.get('BROWSECOMP_PASSWORD', '') + + self.dataset = None + + def load_dataset(self) -> pd.DataFrame: + """Load and decrypt the Browsecomp dataset.""" + print("Loading Browsecomp dataset...") + + try: + # Download the dataset + df = pd.read_csv(DATASET_URL) + print(f"Downloaded {len(df)} encrypted questions") + + # Add index column for reference + df = df.reset_index(drop=True) + df['question_id'] = df.index + 1 + + # Check for canary field in dataset (automatic password) + canary_password = None + if 'canary' in df.columns and len(df) > 0: + canary_password = df.iloc[0]['canary'] + print(f"Found canary password in dataset") + + # Use provided password or canary from dataset + decryption_password = self.password or canary_password + + if decryption_password: + print("Decrypting questions...") + decrypted_rows = [] + + for idx, row in df.iterrows(): + try: + # Use the canary from the row or the provided password + row_canary = row.get('canary', decryption_password) + + # Decrypt the problem and answer columns + row_dict = row.to_dict() + + if 'problem' in row and pd.notna(row['problem']): + row_dict['problem_decrypted'] = decrypt(row['problem'], row_canary) + row_dict['problem_encrypted'] = row['problem'] + else: + row_dict['problem_decrypted'] = "[No problem field]" + + if 'answer' in row and pd.notna(row['answer']): + row_dict['answer_decrypted'] = decrypt(row['answer'], row_canary) + row_dict['answer_encrypted'] = row['answer'] + else: + row_dict['answer_decrypted'] = "" + + decrypted_rows.append(row_dict) + + except Exception as e: + print(f"Error decrypting row {idx}: {e}") + row_dict = row.to_dict() + row_dict['problem_decrypted'] = f"[Decryption failed: {str(e)}]" + row_dict['answer_decrypted'] = "" + decrypted_rows.append(row_dict) + + df = pd.DataFrame(decrypted_rows) + print(f"Successfully decrypted {len(df)} questions") + else: + print("Warning: No password provided and no canary found, questions remain encrypted") + df['problem_decrypted'] = df.get('problem', '') + df['answer_decrypted'] = df.get('answer', '') + + # Normalize column names for consistency + df = self._normalize_columns(df) + + # Add difficulty level (all Browsecomp questions are considered level 1) + df['task'] = 1 + + self.dataset = df + return df + + except Exception as e: + print(f"Error loading dataset: {e}") + raise + + def _normalize_columns(self, df: pd.DataFrame) -> pd.DataFrame: + """Normalize column names to match expected format.""" + # Map Browsecomp columns to standard format + column_mapping = { + 'problem_decrypted': 'question', + 'problem': 'question_encrypted', + 'answer_decrypted': 'true_answer', + 'answer': 'true_answer_encrypted', + 'question_id': 'question_id' + } + + # Apply renaming + for old_col, new_col in column_mapping.items(): + if old_col in df.columns: + df = df.rename(columns={old_col: new_col}) + + # Ensure required columns exist + if 'question' not in df.columns: + if 'problem_decrypted' in df.columns: + df['question'] = df['problem_decrypted'] + else: + raise ValueError("No question column found in dataset") + + if 'true_answer' not in df.columns: + if 'answer_decrypted' in df.columns: + df['true_answer'] = df['answer_decrypted'] + elif 'answer' in df.columns: + df['true_answer'] = df['answer'] + else: + print("Warning: No answer column found, setting empty answers") + df['true_answer'] = '' + + return df + + def get_questions(self, + indices: Optional[List[int]] = None, + limit: Optional[int] = None) -> pd.DataFrame: + """ + Get specific questions from the dataset. + + Args: + indices: List of question numbers (1-based) to retrieve + limit: Maximum number of questions to return + + Returns: + DataFrame with selected questions + """ + if self.dataset is None: + self.load_dataset() + + df = self.dataset.copy() + + # Filter by specific indices if provided + if indices: + # Convert to 0-based indexing + zero_based_indices = [i - 1 for i in indices if i > 0] + valid_indices = [i for i in zero_based_indices if i < len(df)] + + if not valid_indices: + print(f"No valid question indices found. Available range: 1-{len(df)}") + return pd.DataFrame() + + df = df.iloc[valid_indices] + + # Apply limit if specified + if limit and not indices: + df = df.head(limit) + + return df + + def list_questions(self, limit: int = 20) -> None: + """Display available questions.""" + if self.dataset is None: + self.load_dataset() + + print(f"\nAvailable Browsecomp questions (showing first {limit}):") + print("=" * 80) + + for idx in range(min(limit, len(self.dataset))): + row = self.dataset.iloc[idx] + question = row.get('question', row.get('problem_decrypted', '[Encrypted]')) + + # Truncate long questions + if isinstance(question, str): + question_preview = question[:60] + "..." if len(question) > 60 else question + else: + question_preview = "[No question text]" + + print(f"#{idx + 1:3d} {question_preview}") + + if len(self.dataset) > limit: + print(f"\n... and {len(self.dataset) - limit} more questions") + + print(f"\nTotal: {len(self.dataset)} questions") + + # Check if questions are actually decrypted + if len(self.dataset) > 0: + first_question = self.dataset.iloc[0].get('question', '') + if not first_question or first_question.startswith('['): + print("โš ๏ธ Questions are encrypted. Set BROWSECOMP_PASSWORD to decrypt.") + else: + print("โœ“ Questions are decrypted and ready to use") + + +def test_dataset_loading(): + """Test the dataset loading functionality.""" + dataset = BrowsecompDataset() + + try: + df = dataset.load_dataset() + print(f"\nโœ“ Loaded {len(df)} questions") + print(f"Columns: {list(df.columns)}") + + # Show first question + if len(df) > 0: + first = df.iloc[0] + print(f"\nFirst question (truncated):") + question_text = str(first.get('question', '')) + print(f" Question: {question_text[:100]}...") + print(f" Answer: {first.get('true_answer', 'N/A')}") + + except Exception as e: + print(f"โœ— Error: {e}") + return False + + return True + + +if __name__ == "__main__": + test_dataset_loading() \ No newline at end of file diff --git a/eval-server/python/evals/browsecomp_eval_server.py b/eval-server/python/evals/browsecomp_eval_server.py new file mode 100755 index 0000000..753e7cf --- /dev/null +++ b/eval-server/python/evals/browsecomp_eval_server.py @@ -0,0 +1,836 @@ +#!/usr/bin/env python3 +""" +Browsecomp Evaluation Server + +Command-line controlled eval processing server that loads browsecomp questions +into a stack and distributes them one per client connection. +""" + +import argparse +import asyncio +import json +import logging +import sys +import time +from datetime import datetime +from pathlib import Path +from typing import List, Dict, Any, Optional + +# Add eval-server src to path +sys.path.insert(0, str(Path(__file__).parent.parent / "src")) + +# Add current directory (evals) to path for browsecomp_dataset import +sys.path.insert(0, str(Path(__file__).parent)) + +from bo_eval_server import EvalServer, EvaluationStack +from browsecomp_dataset import BrowsecompDataset +from browsecomp_scorer import question_scorer, extract_answer, extract_confidence + + +def log_evaluation_event(logger: logging.Logger, event_type: str, data: Dict[str, Any]) -> None: + """ + Log a structured evaluation event. + + Args: + logger: Logger instance + event_type: Type of event (client_connect, evaluation_start, evaluation_complete, etc.) + data: Event data to log + """ + log_entry = { + "timestamp": datetime.now().isoformat(), + "event_type": event_type, + **data + } + logger.info(f"EVENT: {json.dumps(log_entry)}") + + +def setup_logging(log_dir: str = "./logs") -> logging.Logger: + """ + Set up logging to both console and file. + + Args: + log_dir: Directory to save log files + + Returns: + Configured logger + """ + # Ensure logs directory exists + Path(log_dir).mkdir(exist_ok=True) + + # Create timestamp for log file + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + log_file = Path(log_dir) / f"browsecomp_eval_server_{timestamp}.log" + + # Create logger + logger = logging.getLogger('browsecomp_eval_server') + logger.setLevel(logging.INFO) + + # Clear any existing handlers + logger.handlers.clear() + + # Create formatter + formatter = logging.Formatter( + '%(asctime)s | %(levelname)-8s | %(name)s | %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' + ) + + # Console handler (for immediate feedback) + console_handler = logging.StreamHandler(sys.stdout) + console_handler.setLevel(logging.INFO) + console_handler.setFormatter(formatter) + logger.addHandler(console_handler) + + # File handler (for persistent logging) + file_handler = logging.FileHandler(log_file) + file_handler.setLevel(logging.INFO) + file_handler.setFormatter(formatter) + logger.addHandler(file_handler) + + logger.info(f"Logging initialized - saving to {log_file}") + return logger + + +def extract_response_text(result: Any) -> str: + """ + Extract the actual response text from BrowserOperator's structured response format. + + Args: + result: The response from BrowserOperator (could be string, dict, or structured format) + + Returns: + The text content that should be scored + """ + # Handle partial results with errors first + if isinstance(result, dict) and result.get('partial') and result.get('error'): + # This is our error structure, fallback to string representation + return str(result) + + # Handle structured BrowserOperator response + if isinstance(result, dict): + # Look for messages array (main response structure) + if 'messages' in result and isinstance(result['messages'], list): + response_parts = [] + + for message in result['messages']: + if isinstance(message, dict): + # Model responses with answers + if message.get('entity') == 'model' and message.get('answer'): + response_parts.append(message['answer']) + # Tool results + elif message.get('entity') == 'tool_result' and message.get('resultText'): + response_parts.append(message['resultText']) + # User messages + elif message.get('entity') == 'user' and message.get('text'): + response_parts.append(message['text']) + + if response_parts: + return '\n'.join(response_parts) + + # Fallback: look for common response fields + for field in ['answer', 'response', 'result', 'text', 'content']: + if field in result and result[field]: + return str(result[field]) + + # Fallback to string representation + return str(result) + + +def convert_question_to_evaluation(question_row: Dict[str, Any], question_id: int) -> Dict[str, Any]: + """ + Convert a browsecomp question to the evaluation format expected by eval-server. + + Args: + question_row: Row from the browsecomp dataset DataFrame + question_id: Question ID number (1-based) + + Returns: + Evaluation object compatible with eval-server + """ + question_text = question_row.get('question', question_row.get('problem_decrypted', '')) + true_answer = question_row.get('true_answer', question_row.get('answer_decrypted', '')) + + return { + "id": f"browsecomp_q{question_id}", + "name": f"Browsecomp Question {question_id}", + "description": f"Web browsing evaluation question from browsecomp dataset", + "tool": "chat", + "input": { + "message": f"{question_text}\n\nPlease provide your response in the following format:\n\nExplanation: [Step-by-step reasoning and information gathering]\n\nExact Answer: [The precise answer to the question]\n\nConfidence Score: [Confidence as a percentage, e.g., 85%]" + }, + # Store original data for later reference/scoring + "metadata": { + "question_id": question_id, + "true_answer": true_answer, + "original_question": question_text, + "dataset": "browsecomp" + } + } + + +def load_browsecomp_evaluations( + limit: Optional[int] = None, + questions: Optional[List[int]] = None, + start: Optional[int] = None, + end: Optional[int] = None, + password: Optional[str] = None +) -> List[Dict[str, Any]]: + """ + Load browsecomp questions and convert them to evaluation format. + + Args: + limit: Maximum number of questions to load + questions: Specific question numbers to load (1-based) + start: Start question number for range selection (1-based, inclusive) + end: End question number for range selection (1-based, inclusive) + password: Decryption password (optional, auto-detected from dataset) + + Returns: + List of evaluation objects + """ + print("๐Ÿ“š Loading Browsecomp dataset...") + + # Load dataset + dataset = BrowsecompDataset(password=password) + + try: + df = dataset.load_dataset() + print(f"โœ… Loaded {len(df)} questions from dataset") + except Exception as e: + print(f"โŒ Failed to load dataset: {e}") + return [] + + # Get specific questions, range, or apply limit + if questions: + print(f"๐Ÿ“‹ Filtering to specific questions: {questions}") + df_filtered = dataset.get_questions(indices=questions) + elif start is not None or end is not None: + # Handle range selection + if start is not None and end is not None: + if start > end: + print(f"โŒ Invalid range: start ({start}) cannot be greater than end ({end})") + return [] + if start < 1: + print(f"โŒ Invalid start: question numbers are 1-based, got {start}") + return [] + if end > len(df): + print(f"โš ๏ธ End question {end} exceeds dataset size ({len(df)}), using {len(df)} instead") + end = len(df) + + print(f"๐Ÿ“‹ Loading questions {start} to {end} (range of {end - start + 1} questions)") + # Convert to 0-based indexing for pandas + range_questions = list(range(start, end + 1)) + df_filtered = dataset.get_questions(indices=range_questions) + elif start is not None: + # Only start specified, go to end of dataset + if start < 1: + print(f"โŒ Invalid start: question numbers are 1-based, got {start}") + return [] + if start > len(df): + print(f"โŒ Start question {start} exceeds dataset size ({len(df)})") + return [] + + print(f"๐Ÿ“‹ Loading questions from {start} to end ({len(df) - start + 1} questions)") + range_questions = list(range(start, len(df) + 1)) + df_filtered = dataset.get_questions(indices=range_questions) + else: + # Only end specified, start from beginning + if end < 1: + print(f"โŒ Invalid end: question numbers are 1-based, got {end}") + return [] + if end > len(df): + print(f"โš ๏ธ End question {end} exceeds dataset size ({len(df)}), using {len(df)} instead") + end = len(df) + + print(f"๐Ÿ“‹ Loading questions 1 to {end} ({end} questions)") + range_questions = list(range(1, end + 1)) + df_filtered = dataset.get_questions(indices=range_questions) + elif limit: + print(f"๐Ÿ“‹ Limiting to first {limit} questions") + df_filtered = dataset.get_questions(limit=limit) + else: + print(f"๐Ÿ“‹ Loading all {len(df)} questions") + df_filtered = df + + if df_filtered.empty: + print("โŒ No questions found with the specified criteria") + return [] + + print(f"๐Ÿ”„ Converting {len(df_filtered)} questions to evaluation format...") + + # Convert to evaluation format + evaluations = [] + for idx, row in df_filtered.iterrows(): + question_id = row.get('question_id', idx + 1) + evaluation = convert_question_to_evaluation(row.to_dict(), question_id) + evaluations.append(evaluation) + + # Show preview of first few questions + if len(evaluations) <= 3: + question_preview = evaluation['input']['message'][:80] + "..." + print(f" โ€ข Q{question_id}: {question_preview}") + + if len(evaluations) > 3: + print(f" ... and {len(evaluations) - 3} more questions") + + print(f"โœ… Created {len(evaluations)} evaluation objects") + return evaluations + + +def main(): + """Main function for the browsecomp evaluation server.""" + return asyncio.run(async_main()) + +async def async_main(): + """Async main function for the browsecomp evaluation server.""" + parser = argparse.ArgumentParser(description="Browsecomp Evaluation Server") + parser.add_argument( + "--limit", + type=int, + help="Maximum number of questions to load (default: all 1,266 questions)" + ) + parser.add_argument( + "--questions", + type=int, + nargs="+", + help="Specific question numbers to load (1-based, e.g. --questions 1 5 10)" + ) + parser.add_argument( + "--start", + type=int, + help="Start question number for range selection (1-based, inclusive)" + ) + parser.add_argument( + "--end", + type=int, + help="End question number for range selection (1-based, inclusive)" + ) + parser.add_argument( + "--port", + type=int, + default=8080, + help="Server port (default: 8080)" + ) + parser.add_argument( + "--host", + type=str, + default="127.0.0.1", + help="Server host (default: 127.0.0.1)" + ) + parser.add_argument( + "--auth-key", + type=str, + default="browsecomp-eval", + help="Authentication key (default: browsecomp-eval)" + ) + parser.add_argument( + "--password", + type=str, + help="Dataset decryption password (optional, auto-detected from dataset)" + ) + parser.add_argument( + "--list", + action="store_true", + help="List available questions without starting server" + ) + parser.add_argument( + "--list-limit", + type=int, + default=20, + help="Number of questions to show when listing (default: 20)" + ) + parser.add_argument( + "--save-results", + action="store_true", + help="Save evaluation results to JSON file on completion" + ) + parser.add_argument( + "--timeout", + type=float, + default=3600.0, + help="Timeout for each evaluation in seconds (default: 3600s/60min)" + ) + + args = parser.parse_args() + + # Setup logging + logger = setup_logging("./logs") + + # Handle list mode + if args.list: + logger.info("๐Ÿ“‹ Listing available browsecomp questions...") + dataset = BrowsecompDataset(password=args.password) + + # Apply filtering for list mode if range or specific questions are specified + if args.questions or args.start is not None or args.end is not None: + # Load the full dataset first + df = dataset.load_dataset() + + # Apply the same filtering logic as the main function + if args.questions: + print(f"๐Ÿ“‹ Showing specific questions: {args.questions}") + df_filtered = dataset.get_questions(indices=args.questions) + elif args.start is not None or args.end is not None: + # Handle range selection (same logic as in load_browsecomp_evaluations) + if args.start is not None and args.end is not None: + if args.start > args.end: + print(f"โŒ Invalid range: start ({args.start}) cannot be greater than end ({args.end})") + return 1 + if args.start < 1: + print(f"โŒ Invalid start: question numbers are 1-based, got {args.start}") + return 1 + if args.end > len(df): + print(f"โš ๏ธ End question {args.end} exceeds dataset size ({len(df)}), using {len(df)} instead") + args.end = len(df) + + print(f"๐Ÿ“‹ Showing questions {args.start} to {args.end}") + range_questions = list(range(args.start, args.end + 1)) + df_filtered = dataset.get_questions(indices=range_questions) + elif args.start is not None: + if args.start < 1: + print(f"โŒ Invalid start: question numbers are 1-based, got {args.start}") + return 1 + if args.start > len(df): + print(f"โŒ Start question {args.start} exceeds dataset size ({len(df)})") + return 1 + + print(f"๐Ÿ“‹ Showing questions from {args.start} to end") + range_questions = list(range(args.start, len(df) + 1)) + df_filtered = dataset.get_questions(indices=range_questions) + else: # args.end is not None + if args.end < 1: + print(f"โŒ Invalid end: question numbers are 1-based, got {args.end}") + return 1 + if args.end > len(df): + print(f"โš ๏ธ End question {args.end} exceeds dataset size ({len(df)}), using {len(df)} instead") + args.end = len(df) + + print(f"๐Ÿ“‹ Showing questions 1 to {args.end}") + range_questions = list(range(1, args.end + 1)) + df_filtered = dataset.get_questions(indices=range_questions) + + # Display filtered results + if not df_filtered.empty: + print("=" * 80) + for idx, row in df_filtered.iterrows(): + question_id = row.get('question_id', idx + 1) + question = row.get('question', row.get('problem_decrypted', '[Encrypted]')) + + if isinstance(question, str): + question_preview = question[:60] + "..." if len(question) > 60 else question + else: + question_preview = str(question)[:60] + "..." + + print(f"#{question_id:3d} {question_preview}") + + print(f"\nShowing {len(df_filtered)} question(s)") + else: + print("โŒ No questions found with the specified criteria") + else: + # Standard list mode + dataset.list_questions(limit=args.list_limit) + + return + + logger.info("๐Ÿš€ Starting Browsecomp Evaluation Server") + logger.info("=" * 60) + + # Validate arguments + if args.questions and (args.start is not None or args.end is not None): + print("โŒ Cannot use --questions together with --start/--end. Choose one approach.") + return 1 + + if args.limit and (args.start is not None or args.end is not None): + print("โŒ Cannot use --limit together with --start/--end. Choose one approach.") + return 1 + + # Load evaluations + evaluations = load_browsecomp_evaluations( + limit=args.limit, + questions=args.questions, + start=args.start, + end=args.end, + password=args.password + ) + + if not evaluations: + print("โŒ No evaluations loaded. Exiting.") + return 1 + + # Create evaluation stack and populate it + stack = EvaluationStack() + + print(f"\n๐Ÿ“š Loading {len(evaluations)} evaluations into stack...") + for evaluation in evaluations: + stack.push(evaluation) + + print(f"โœ… Stack loaded with {stack.size()} evaluations") + print(f"๐Ÿ” Top evaluation: {stack.peek()['name'] if stack.peek() else 'None'}") + + # Create server + server = EvalServer( + auth_key=args.auth_key, + host=args.host, + port=args.port, + log_level='INFO', + log_dir='./logs', + rpc_timeout=args.timeout, + ) + + # Track processed evaluations + completed_evaluations = [] + failed_evaluations = [] + client_evaluation_map = {} # client_id -> evaluation_id mapping + + print(f"\n๐ŸŒ Server Configuration:") + print(f" Host: {args.host}") + print(f" Port: {args.port}") + print(f" Auth Key: {args.auth_key}") + print(f" Timeout: {args.timeout}s ({args.timeout/60:.1f} minutes)") + print(f" Total Evaluations: {stack.size()}") + + @server.on_connect + async def handle_client(client): + logger.info(f'๐ŸŽ‰ CLIENT CONNECTED!') + logger.info(f' - Client ID: {client.id}') + logger.info(f' - Client tabId: {client.tab_id}') + logger.info(f' - Client info: {client.get_info()}') + + # Log structured client connection event + log_evaluation_event(logger, "client_connected", { + "client_id": client.id, + "tab_id": client.tab_id, + "client_info": client.get_info(), + "stack_remaining": stack.size() + }) + + # Check if we have evaluations left in the stack + if stack.is_empty(): + print('โš ๏ธ No more evaluations in stack for this client') + print(' All browsecomp questions have been distributed') + await client.send_message({ + "type": "no_evaluations", + "message": "All browsecomp questions have been distributed" + }) + return + + # Pop the next evaluation from the stack (ONE evaluation per client!) + evaluation = stack.pop() + evaluation_id = evaluation['id'] + question_id = evaluation['metadata']['question_id'] + + print(f'๐Ÿ“‹ Assigning evaluation: "{evaluation["name"]}" (Question #{question_id})') + print(f'๐Ÿ“Š Remaining evaluations in stack: {stack.size()}') + + # Track which evaluation was sent to which client + client_evaluation_map[client.id] = evaluation_id + + # Log evaluation assignment + log_evaluation_event(logger, "evaluation_assigned", { + "client_id": client.id, + "evaluation_id": evaluation_id, + "question_id": question_id, + "evaluation_name": evaluation["name"], + "stack_remaining": stack.size(), + "true_answer": evaluation['metadata']['true_answer'] + }) + + try: + print(f'๐Ÿ”„ Starting evaluation... (timeout: {args.timeout}s)') + result = await client.evaluate(evaluation, timeout=args.timeout) + + print('โœ… Evaluation completed!') + + # Extract the true answer from evaluation metadata + true_answer = evaluation['metadata']['true_answer'] + + # Check if this is a partial result with errors + is_partial_result = (isinstance(result, dict) and + result.get('partial') and + result.get('error')) + + # Extract the actual response text from the structured format + response_text = extract_response_text(result) + + # Show structured response details if available + if isinstance(result, dict) and 'messages' in result: + message_count = len(result.get('messages', [])) + model_used = result.get('modelUsed', 'unknown') + execution_time = result.get('executionTime', 0) + tool_calls = len(result.get('toolCalls', [])) + print(f'๐Ÿ“Š Response structure: {message_count} messages, {tool_calls} tool calls, {model_used} model, {execution_time}ms') + else: + print(f'๐Ÿ“Š Response for "{evaluation["name"]}": {response_text[:100]}...') + + # Score the response + is_correct = question_scorer(response_text, true_answer) + extracted_answer = extract_answer(response_text) + confidence = extract_confidence(response_text) + + # Print scoring results + print(f'๐ŸŽฏ Scoring Results:') + print(f' - True Answer: {true_answer}') + print(f' - Extracted Answer: {extracted_answer}') + print(f' - Correct: {"โœ… YES" if is_correct else "โŒ NO"}') + print(f' - Confidence: {confidence}%') + + if is_partial_result: + print(f'โš ๏ธ Note: Result obtained after retries with errors:') + print(f' - Error: {result.get("error", "Unknown error")}') + print(f' - Attempts: {result.get("attempts", "Unknown")}') + print(f' - The BrowserOperator had issues but provided a response') + + # Log evaluation completion + log_evaluation_event(logger, "evaluation_completed", { + "client_id": client.id, + "evaluation_id": evaluation_id, + "question_id": question_id, + "evaluation_name": evaluation["name"], + "is_correct": is_correct, + "extracted_answer": extracted_answer, + "true_answer": true_answer, + "confidence": confidence, + "is_partial_result": is_partial_result, + "model_used": result.get('modelUsed') if isinstance(result, dict) else None, + "execution_time_ms": result.get('executionTime') if isinstance(result, dict) else None, + "tool_calls_count": len(result.get('toolCalls', [])) if isinstance(result, dict) else None + }) + + completed_evaluations.append({ + 'client_id': client.id, + 'evaluation': evaluation, + 'result': result, + 'question_id': question_id, + 'scoring': { + 'is_correct': is_correct, + 'true_answer': true_answer, + 'extracted_answer': extracted_answer, + 'confidence': confidence + }, + 'partial_result': is_partial_result, + 'execution_info': { + 'had_errors': is_partial_result, + 'error_message': result.get('error') if is_partial_result else None, + 'retry_attempts': result.get('attempts') if is_partial_result else 1, + 'model_used': result.get('modelUsed') if isinstance(result, dict) else None, + 'execution_time_ms': result.get('executionTime') if isinstance(result, dict) else None, + 'tool_calls_count': len(result.get('toolCalls', [])) if isinstance(result, dict) else None, + 'messages_count': len(result.get('messages', [])) if isinstance(result, dict) else None + } + }) + + except Exception as e: + error_msg = str(e) + print(f'โŒ Evaluation "{evaluation["name"]}" failed: {error_msg}') + + # Check if this is a tool execution error that might still be running + if "Tool execution failed" in error_msg or "-32000" in error_msg: + print(f'โš ๏ธ Note: BrowserOperator may still be processing this question') + print(f' The client reported an error but might continue execution') + print(f' Consider increasing timeout with --timeout parameter') + + # Log evaluation failure + log_evaluation_event(logger, "evaluation_failed", { + "client_id": client.id, + "evaluation_id": evaluation_id, + "question_id": question_id, + "evaluation_name": evaluation["name"], + "error_message": error_msg, + "is_tool_execution_error": "Tool execution failed" in error_msg or "-32000" in error_msg, + "true_answer": evaluation['metadata']['true_answer'] + }) + + failed_evaluations.append({ + 'client_id': client.id, + 'evaluation': evaluation, + 'error': error_msg, + 'question_id': question_id, + }) + + # Send completion message + try: + await client.send_message({ + "type": "evaluation_complete", + "evaluation_id": evaluation_id, + "evaluation_name": evaluation["name"], + "question_id": question_id, + "status": "completed" if evaluation_id not in [e['evaluation']['id'] for e in failed_evaluations] else "failed" + }) + except Exception as e: + print(f' โš ๏ธ Failed to send completion message: {e}') + + @server.on_disconnect + async def handle_disconnect(client_info): + client_id = client_info["id"] + print(f'\n๐Ÿ”Œ Client disconnected: {client_id}') + + # Show what evaluation this client was working on + evaluation_id = None + if client_id in client_evaluation_map: + evaluation_id = client_evaluation_map[client_id] + print(f' Was working on: {evaluation_id}') + + # Log client disconnect + log_evaluation_event(logger, "client_disconnected", { + "client_id": client_id, + "evaluation_id": evaluation_id, + "completed_count": len(completed_evaluations), + "failed_count": len(failed_evaluations), + "stack_remaining": stack.size() + }) + + # Show final statistics + total_completed = len(completed_evaluations) + total_failed = len(failed_evaluations) + remaining = stack.size() + total_original = len(evaluations) + + print(f'\n๐Ÿ“Š Current Statistics:') + print(f' โœ… Completed: {total_completed}/{total_original}') + print(f' โŒ Failed: {total_failed}/{total_original}') + print(f' ๐Ÿ“š Remaining: {remaining}/{total_original}') + print(f' ๐Ÿ”„ In Progress: {total_original - total_completed - total_failed - remaining}') + + # Calculate scoring statistics + if completed_evaluations: + correct_count = sum(1 for item in completed_evaluations if item.get('scoring', {}).get('is_correct', False)) + partial_count = sum(1 for item in completed_evaluations if item.get('partial_result', False)) + accuracy = correct_count / total_completed * 100 if total_completed > 0 else 0 + avg_confidence = sum(item.get('scoring', {}).get('confidence', 0) for item in completed_evaluations) / total_completed if total_completed > 0 else 0 + + print(f'\n๐ŸŽฏ Scoring Statistics:') + print(f' ๐Ÿ“Š Accuracy: {accuracy:.1f}% ({correct_count}/{total_completed} correct)') + print(f' ๐Ÿ’ก Average Confidence: {avg_confidence:.1f}%') + if partial_count > 0: + print(f' โš ๏ธ Partial Results: {partial_count}/{total_completed} had execution errors but recovered') + + if completed_evaluations: + print(f'\n๐ŸŽฏ Recently Completed Evaluations:') + for item in completed_evaluations[-3:]: # Show last 3 + eval_name = item['evaluation']['name'] + question_id = item['question_id'] + client_id_short = item['client_id'][:8] # Short client ID + is_correct = item.get('scoring', {}).get('is_correct', False) + confidence = item.get('scoring', {}).get('confidence', 0) + is_partial = item.get('partial_result', False) + status_emoji = 'โœ…' if is_correct else 'โŒ' + partial_indicator = 'โš ๏ธ' if is_partial else '' + print(f' โ€ข Q{question_id}: {eval_name} {status_emoji}{partial_indicator} (confidence: {confidence}%, client: {client_id_short})') + + if failed_evaluations: + print(f'\n๐Ÿ’ฅ Failed Evaluations:') + for item in failed_evaluations: + eval_name = item['evaluation']['name'] + question_id = item['question_id'] + error = item['error'] + print(f' โ€ข Q{question_id}: {eval_name} - {error}') + + # Start server + try: + print(f'\n๐Ÿš€ Starting server on ws://{server.config.host}:{server.config.port}') + print(' Connect your BrowserOperator to start processing browsecomp questions') + print(' Press Ctrl+C to stop the server') + print('=' * 60) + + await server.start() + + # Keep server running + await server.wait_closed() + + except KeyboardInterrupt: + print('\n๐Ÿ›‘ Received interrupt signal, stopping server...') + await server.stop() + print('โœ… Server stopped successfully') + + # Show final summary + total_completed = len(completed_evaluations) + total_failed = len(failed_evaluations) + total_processed = total_completed + total_failed + + if total_processed > 0: + print(f'\n๐Ÿ“ˆ Final Summary:') + print(f' Total processed: {total_processed}/{len(evaluations)}') + print(f' Success rate: {total_completed/total_processed*100:.1f}%') + print(f' Completed: {total_completed}') + print(f' Failed: {total_failed}') + + # Final scoring statistics + if completed_evaluations: + correct_count = sum(1 for item in completed_evaluations if item.get('scoring', {}).get('is_correct', False)) + accuracy = correct_count / total_completed * 100 if total_completed > 0 else 0 + avg_confidence = sum(item.get('scoring', {}).get('confidence', 0) for item in completed_evaluations) / total_completed if total_completed > 0 else 0 + + print(f'\n๐Ÿ† Final Scoring Results:') + print(f' ๐Ÿ“Š Overall Accuracy: {accuracy:.1f}% ({correct_count}/{total_completed} correct)') + print(f' ๐Ÿ’ก Average Confidence: {avg_confidence:.1f}%') + + # Show confidence correlation + correct_items = [item for item in completed_evaluations if item.get('scoring', {}).get('is_correct', False)] + incorrect_items = [item for item in completed_evaluations if not item.get('scoring', {}).get('is_correct', False)] + + if correct_items: + avg_conf_correct = sum(item.get('scoring', {}).get('confidence', 0) for item in correct_items) / len(correct_items) + print(f' โœ… Avg confidence when correct: {avg_conf_correct:.1f}%') + + if incorrect_items: + avg_conf_incorrect = sum(item.get('scoring', {}).get('confidence', 0) for item in incorrect_items) / len(incorrect_items) + print(f' โŒ Avg confidence when incorrect: {avg_conf_incorrect:.1f}%') + + # Save results to JSON file + if completed_evaluations and (args.save_results or total_completed == len(evaluations)): + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + results_file = f"browsecomp_eval_results_{timestamp}.json" + + results_data = { + "timestamp": timestamp, + "total_questions": len(evaluations), + "completed": total_completed, + "failed": total_failed, + "accuracy": accuracy, + "average_confidence": avg_confidence, + "evaluations": completed_evaluations + } + + with open(results_file, 'w') as f: + json.dump(results_data, f, indent=2) + + print(f'\n๐Ÿ’พ Results saved to: {results_file}') + + # Log final session summary + log_evaluation_event(logger, "session_completed", { + "total_questions": len(evaluations), + "completed": total_completed, + "failed": total_failed, + "accuracy": accuracy, + "average_confidence": avg_confidence, + "partial_results": partial_count, + "results_file": results_file if 'results_file' in locals() else None + }) + + except Exception as e: + logger.error(f'๐Ÿ’ฅ Server error: {e}') + log_evaluation_event(logger, "server_error", { + "error_message": str(e), + "completed_count": len(completed_evaluations), + "failed_count": len(failed_evaluations) + }) + + if server.is_running(): + await server.stop() + return 1 + + logger.info("โœ… Server session ended successfully") + return 0 + + +if __name__ == "__main__": + # Ensure logs directory exists + Path("./logs").mkdir(exist_ok=True) + + try: + exit_code = main() + sys.exit(exit_code) + except KeyboardInterrupt: + print('\n๐Ÿ‘‹ Goodbye!') + except Exception as e: + print(f'๐Ÿ’ฅ Fatal error: {e}') + sys.exit(1) \ No newline at end of file diff --git a/eval-server/python/evals/browsecomp_scorer.py b/eval-server/python/evals/browsecomp_scorer.py new file mode 100644 index 0000000..17561e6 --- /dev/null +++ b/eval-server/python/evals/browsecomp_scorer.py @@ -0,0 +1,328 @@ +#!/usr/bin/env python3 +""" +Browsecomp Benchmark Scorer +Implements the scoring logic for Browsecomp benchmark questions +""" + +import re +from typing import Union, Any, Optional, Dict + + +# Grader template based on the original Browsecomp evaluation +GRADER_TEMPLATE = """ +Judge whether the following [response] to [question] is correct or not based on the precise and unambiguous [correct_answer] below. + +[question]: {question} + +[response]: {response} + +Your judgement must be in the format and criteria specified below: + +extracted_final_answer: The final exact answer extracted from the [response]. Put the answer as 'None' if there is no exact, final answer to extract. + +[correct_answer]: {correct_answer} + +reasoning: Explain why the extracted_final_answer is correct or incorrect based on [correct_answer], focusing only on if there are meaningful differences between [correct_answer] and the extracted_final_answer. + +correct: Answer 'yes' if extracted_final_answer matches the [correct_answer] given above, or is within a small margin of error for numerical problems. Answer 'no' otherwise. + +confidence: The extracted confidence score between 0% and 100% from [response]. Put 100 if no confidence score available. +""".strip() + + +def extract_answer(response: str) -> str: + """Extract the exact answer from a response.""" + # Look for "Exact Answer:" pattern + patterns = [ + r'[Ee]xact [Aa]nswer:\s*([^\n]+)', + r'[Ff]inal [Aa]nswer:\s*([^\n]+)', + r'[Aa]nswer:\s*([^\n]+)', + ] + + for pattern in patterns: + match = re.search(pattern, response) + if match: + return match.group(1).strip() + + # If no pattern found, try to extract from the end of response + lines = response.strip().split('\n') + if lines: + # Check last few lines for answer-like content + for line in reversed(lines[-3:]): + line = line.strip() + if line and not line.startswith('[') and not line.startswith('Confidence'): + return line + + return "" + + +def extract_confidence(response: str) -> float: + """Extract confidence score from response.""" + patterns = [ + r'[Cc]onfidence\s*[Ss]core:\s*(\d+)%', + r'[Cc]onfidence:\s*(\d+)%', + r'(\d+)%\s*confident', + r'I am (\d+)% confident', + r'(\d+)%\s*confidence', + ] + + for pattern in patterns: + match = re.search(pattern, response) + if match: + return float(match.group(1)) + + return 100.0 # Default to 100% if not specified + + +def normalize_answer(answer: str) -> str: + """Normalize answer for comparison.""" + if not isinstance(answer, str): + answer = str(answer) + + # Convert to lowercase + answer = answer.lower().strip() + + # Remove common punctuation at the end + answer = answer.rstrip('.,!?;:') + + # Normalize whitespace + answer = ' '.join(answer.split()) + + return answer + + +def extract_number(text: str) -> Union[float, None]: + """Extract a number from text.""" + # Remove common separators and convert to standard format + text = text.replace(',', '') + + # Try to find numbers with various patterns + patterns = [ + r'[-+]?\d*\.?\d+(?:[eE][-+]?\d+)?', # Scientific notation + r'[-+]?\d+\.?\d*', # Regular numbers + r'[-+]?\d+', # Integers + ] + + for pattern in patterns: + matches = re.findall(pattern, text) + if matches: + try: + # Return the first valid number + return float(matches[0]) + except ValueError: + continue + + return None + + +def compare_numeric_answers(pred: str, true: str, tolerance: float = 0.01) -> bool: + """Compare numeric answers with tolerance.""" + pred_num = extract_number(pred) + true_num = extract_number(true) + + if pred_num is None or true_num is None: + return False + + # Check relative tolerance for non-zero values + if true_num != 0: + relative_error = abs(pred_num - true_num) / abs(true_num) + return relative_error <= tolerance + else: + # For zero values, use absolute tolerance + return abs(pred_num - true_num) <= tolerance + + +def question_scorer(prediction: str, true_answer: str) -> bool: + """ + Score a prediction against the true answer. + Returns True if the prediction is considered correct. + + This is a simplified scorer for quick evaluation. + For production use, consider using grade_with_llm for more accurate grading. + """ + if not prediction or not true_answer: + return False + + # Extract the answer part from the prediction + extracted_answer = extract_answer(prediction) + if not extracted_answer: + extracted_answer = prediction + + # Normalize both answers + pred_norm = normalize_answer(extracted_answer) + true_norm = normalize_answer(true_answer) + + # Exact match after normalization + if pred_norm == true_norm: + return True + + # Check if the true answer is contained in the prediction + if true_norm in pred_norm: + return True + + # Check numeric answers + if any(char.isdigit() for char in true_answer): + if compare_numeric_answers(extracted_answer, true_answer): + return True + + # Check for common variations + # Handle yes/no answers + if true_norm in ['yes', 'no']: + if true_norm == 'yes' and pred_norm in ['yes', 'true', 'correct', 'affirmative']: + return True + if true_norm == 'no' and pred_norm in ['no', 'false', 'incorrect', 'negative']: + return True + + return False + + +def grade_with_llm(question: str, correct_answer: str, response: str, + grader_function: Optional[callable] = None) -> Dict[str, Any]: + """ + Grade a response using an LLM grader. + + Args: + question: The original question + correct_answer: The correct answer + response: The model's response + grader_function: Optional function to call the grader LLM + + Returns: + Dictionary with grading results + """ + if not grader_function: + # If no grader function provided, use simple scoring + is_correct = question_scorer(response, correct_answer) + confidence = extract_confidence(response) + + return { + 'is_correct': is_correct, + 'confidence': confidence, + 'reasoning': 'Graded using rule-based scorer', + 'extracted_answer': extract_answer(response) + } + + # Format the grading prompt + grader_prompt = GRADER_TEMPLATE.format( + question=question, + correct_answer=correct_answer, + response=response, + ) + + # Call the grader + grading_response = grader_function(grader_prompt) + + # Parse the grading response + is_correct = False + confidence = 100.0 + reasoning = "" + extracted_answer = "" + + # Look for patterns in grading response + correct_match = re.search(r"correct:\s*(yes|no)", grading_response.lower()) + if correct_match: + is_correct = correct_match.group(1) == "yes" + + confidence_match = re.search(r"confidence:\s*(\d+)", grading_response) + if confidence_match: + confidence = float(confidence_match.group(1)) + + reasoning_match = re.search(r"reasoning:\s*([^\n]+)", grading_response, re.IGNORECASE) + if reasoning_match: + reasoning = reasoning_match.group(1).strip() + + answer_match = re.search(r"extracted_final_answer:\s*([^\n]+)", grading_response, re.IGNORECASE) + if answer_match: + extracted_answer = answer_match.group(1).strip() + + return { + 'is_correct': is_correct, + 'confidence': confidence, + 'reasoning': reasoning, + 'extracted_answer': extracted_answer, + 'grader_response': grading_response + } + + +def evaluate_predictions(predictions: list, true_answers: list) -> dict: + """ + Evaluate a list of predictions against true answers. + Returns statistics about the evaluation. + """ + if len(predictions) != len(true_answers): + raise ValueError("Predictions and true answers must have the same length") + + results = { + 'total': len(predictions), + 'correct': 0, + 'incorrect': 0, + 'details': [], + 'average_confidence': 0.0 + } + + total_confidence = 0.0 + + for pred, true in zip(predictions, true_answers): + is_correct = question_scorer(pred, true) + confidence = extract_confidence(pred) + + results['details'].append({ + 'prediction': pred, + 'true_answer': true, + 'correct': is_correct, + 'confidence': confidence, + 'extracted_answer': extract_answer(pred) + }) + + if is_correct: + results['correct'] += 1 + else: + results['incorrect'] += 1 + + total_confidence += confidence + + results['accuracy'] = results['correct'] / results['total'] if results['total'] > 0 else 0 + results['average_confidence'] = total_confidence / results['total'] if results['total'] > 0 else 0 + + return results + + +# Example usage and tests +if __name__ == "__main__": + # Test cases + test_cases = [ + ( + "Explanation: I found that...\nExact Answer: Paris\nConfidence Score: 95%", + "Paris", + True + ), + ( + "The answer is 42", + "42", + True + ), + ( + "Exact Answer: Yes\nConfidence: 80%", + "yes", + True + ), + ( + "After browsing, I found the answer is 3.14159", + "3.14", + True + ), + ( + "The result is 99", + "100", + False + ), + ] + + print("Testing Browsecomp scorer:") + for pred, true, expected in test_cases: + result = question_scorer(pred, true) + extracted = extract_answer(pred) + confidence = extract_confidence(pred) + status = "โœ“" if result == expected else "โœ—" + print(f"{status} Pred: '{pred[:50]}...' | True: '{true}' | Correct: {result}") + print(f" Extracted: '{extracted}' | Confidence: {confidence}%") \ No newline at end of file diff --git a/eval-server/python/evals/run_browsecomp_eval_server.sh b/eval-server/python/evals/run_browsecomp_eval_server.sh new file mode 100755 index 0000000..e393dad --- /dev/null +++ b/eval-server/python/evals/run_browsecomp_eval_server.sh @@ -0,0 +1,12 @@ +#!/bin/bash +# Wrapper script to run browsecomp eval server with proper dependencies + +# Get the directory of this script +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +EVAL_SERVER_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" + +# Change to eval-server python directory +cd "$EVAL_SERVER_DIR" + +# Run with uv, passing all arguments +uv run python evals/browsecomp_eval_server.py "$@" \ No newline at end of file diff --git a/eval-server/python/examples/__init__.py b/eval-server/python/examples/__init__.py new file mode 100644 index 0000000..4bb7da7 --- /dev/null +++ b/eval-server/python/examples/__init__.py @@ -0,0 +1,10 @@ +""" +Examples package for bo-eval-server. + +This package contains working examples demonstrating different uses of the evaluation server: +- basic_server: Simple WebSocket server setup +- with_stack: Using evaluation stack for queuing evaluations +- programmatic_evals: Advanced programmatic evaluation creation +""" + +__version__ = "1.0.0" \ No newline at end of file diff --git a/eval-server/python/examples/basic_server.py b/eval-server/python/examples/basic_server.py new file mode 100644 index 0000000..3a1f9b0 --- /dev/null +++ b/eval-server/python/examples/basic_server.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 +""" +Basic EvalServer example - Simple WebSocket server setup. + +This example shows the minimal setup for a WebSocket evaluation server. +""" + +import asyncio +import sys +from pathlib import Path + +# Add src to path for local development +sys.path.insert(0, str(Path(__file__).parent.parent / "src")) + +from bo_eval_server import EvalServer + + +async def main(): + """Main example function for basic server setup.""" + # Create server with basic configuration + server = EvalServer( + auth_key='hello', + host='127.0.0.1', + port=8080, + log_level='DEBUG', + log_dir='./logs', # Optional: create logs directory + ) + + # Set up client connection handler + @server.on_connect + async def handle_client(client): + print(f'๐Ÿ”— Client connected: {client.id}') + print(f' Tab ID: {client.tab_id}') + print(f' Capabilities: {client.capabilities}') + + # Send EXACTLY the same evaluation as NodeJS library-usage.js + try: + print('๐Ÿ”„ Starting evaluation...') + response = await client.evaluate({ + "id": "test_eval", + "name": "Capital of France", + "description": "Simple test evaluation", + "tool": "chat", + "input": { + "message": "What is the capital of France?" + } + }) + + print('โœ… Evaluation completed!') + print(f'๐Ÿ“Š Response: {response}') + + except Exception as e: + print(f'โŒ Evaluation failed: {e}') + + # Send a custom message + try: + await client.send_message({ + "type": "info", + "message": "Evaluation completed successfully!" + }) + except Exception as e: + print(f'โš ๏ธ Failed to send message: {e}') + + # Set up client disconnection handler + @server.on_disconnect + async def handle_disconnect(client_info): + print(f'๐Ÿ”Œ Client disconnected: {client_info["id"]}') + print(f' Connection duration: {client_info.get("duration", "unknown")}s') + + # Start the server + try: + await server.start() + print(f'๐Ÿš€ Server running on ws://{server.config.host}:{server.config.port}') + print(' Press Ctrl+C to stop the server') + + # Keep server running + await server.wait_closed() + + except KeyboardInterrupt: + print('\n๐Ÿ›‘ Received interrupt signal, stopping server...') + await server.stop() + print('โœ… Server stopped successfully') + + except Exception as e: + print(f'๐Ÿ’ฅ Server error: {e}') + if server.is_running(): + await server.stop() + + +if __name__ == "__main__": + # Check if logs directory exists, create if needed + Path("./logs").mkdir(exist_ok=True) + + try: + asyncio.run(main()) + except KeyboardInterrupt: + print('\n๐Ÿ‘‹ Goodbye!') + except Exception as e: + print(f'๐Ÿ’ฅ Fatal error: {e}') + sys.exit(1) \ No newline at end of file diff --git a/eval-server/python/examples/logs/.gitignore b/eval-server/python/examples/logs/.gitignore new file mode 100644 index 0000000..326f777 --- /dev/null +++ b/eval-server/python/examples/logs/.gitignore @@ -0,0 +1,2 @@ +*.log +*.jsonl \ No newline at end of file diff --git a/eval-server/python/examples/programmatic_evals.py b/eval-server/python/examples/programmatic_evals.py new file mode 100644 index 0000000..47e579d --- /dev/null +++ b/eval-server/python/examples/programmatic_evals.py @@ -0,0 +1,428 @@ +#!/usr/bin/env python3 +""" +Programmatic evaluation creation example. + +This example demonstrates creating and customizing evaluations programmatically +in Python code, including dynamic evaluation generation and conditional logic. +""" + +import asyncio +import random +import sys +import time +from pathlib import Path +from typing import Dict, Any, List + +# Add src to path for local development +sys.path.insert(0, str(Path(__file__).parent.parent / "src")) + +from bo_eval_server import EvalServer, EvaluationStack + + +class EvaluationGenerator: + """Helper class for generating evaluations programmatically.""" + + def __init__(self): + self.counter = 0 + + def create_evaluation( + self, + name: str, + tool: str, + input_data: Dict[str, Any], + description: str = "", + metadata: Dict[str, Any] = None, + timeout: float = 30.0, + ) -> Dict[str, Any]: + """Create a standardized evaluation object.""" + self.counter += 1 + + return { + "id": f"generated_{self.counter:03d}_{int(time.time())}", + "name": name, + "description": description or f"Programmatically generated evaluation: {name}", + "tool": tool, + "input": input_data, + "timeout": timeout, + "metadata": { + "generated": True, + "timestamp": time.time(), + "generator": "programmatic_evals.py", + **(metadata or {}) + } + } + + def create_chat_evaluation( + self, + message: str, + name: str = None, + **kwargs + ) -> Dict[str, Any]: + """Create a chat-based evaluation.""" + return self.create_evaluation( + name=name or f"Chat: {message[:30]}...", + tool="chat", + input_data={"message": message}, + **kwargs + ) + + def create_action_evaluation( + self, + objective: str, + url: str = None, + name: str = None, + **kwargs + ) -> Dict[str, Any]: + """Create an action-based evaluation.""" + input_data = {"objective": objective} + if url: + input_data["url"] = url + + return self.create_evaluation( + name=name or f"Action: {objective[:30]}...", + tool="action", + input_data=input_data, + **kwargs + ) + + def create_research_evaluation( + self, + query: str, + depth: str = "basic", + name: str = None, + **kwargs + ) -> Dict[str, Any]: + """Create a research-based evaluation.""" + return self.create_evaluation( + name=name or f"Research: {query[:30]}...", + tool="research", + input_data={ + "query": query, + "depth": depth, + }, + **kwargs + ) + + +def create_dynamic_evaluations(generator: EvaluationGenerator) -> List[Dict[str, Any]]: + """Create evaluations based on dynamic logic.""" + evaluations = [] + + # Math evaluations with increasing difficulty + for i in range(3): + if i == 0: + a, b = random.randint(1, 10), random.randint(1, 10) + op = "+" + difficulty = "easy" + elif i == 1: + a, b = random.randint(10, 50), random.randint(10, 50) + op = "*" + difficulty = "medium" + else: + a, b = random.randint(100, 1000), random.randint(2, 20) + op = "/" + difficulty = "hard" + + evaluation = generator.create_chat_evaluation( + message=f"Calculate: {a} {op} {b}", + name=f"Math {difficulty.title()} #{i+1}", + metadata={ + "category": "mathematics", + "difficulty": difficulty, + "numbers": [a, b], + "operation": op + } + ) + evaluations.append(evaluation) + + # Conditional evaluations based on current time + current_hour = time.localtime().tm_hour + if 6 <= current_hour < 12: + time_context = "morning" + questions = [ + "What's a good breakfast recipe?", + "How can I boost my energy in the morning?", + ] + elif 12 <= current_hour < 18: + time_context = "afternoon" + questions = [ + "What's a healthy lunch option?", + "How can I stay productive in the afternoon?", + ] + else: + time_context = "evening" + questions = [ + "What's a good dinner recipe?", + "How can I relax in the evening?", + ] + + for i, question in enumerate(questions): + evaluation = generator.create_chat_evaluation( + message=question, + name=f"{time_context.title()} Question #{i+1}", + metadata={ + "category": "lifestyle", + "time_context": time_context, + "hour": current_hour + } + ) + evaluations.append(evaluation) + + # Generate research evaluations for trending topics + trending_topics = [ + "artificial intelligence trends 2024", + "sustainable energy solutions", + "space exploration recent developments", + ] + + for topic in trending_topics: + evaluation = generator.create_research_evaluation( + query=topic, + depth="detailed", + name=f"Research: {topic.title()}", + metadata={ + "category": "research", + "topic": topic, + "priority": "high" + }, + timeout=60.0 # Longer timeout for research + ) + evaluations.append(evaluation) + + return evaluations + + +async def main(): + """Main example function for programmatic evaluation creation.""" + print("๐Ÿญ Programmatic Evaluation Generation Example") + print("=" * 50) + + # Create evaluation generator + generator = EvaluationGenerator() + + # Create evaluation stack + stack = EvaluationStack() + + # Generate static evaluations + print("\n๐Ÿ“ Creating static evaluations...") + static_evals = [ + generator.create_chat_evaluation( + message="Explain quantum computing in simple terms", + name="Quantum Computing Explanation", + metadata={"category": "science", "complexity": "advanced"} + ), + generator.create_action_evaluation( + objective="Find and click the search button", + url="https://www.google.com", + name="Google Search Action", + metadata={"category": "web_automation", "site": "google"} + ), + generator.create_chat_evaluation( + message="Write a haiku about programming", + name="Programming Haiku", + metadata={"category": "creative", "format": "poetry"} + ), + ] + + for eval_obj in static_evals: + stack.push(eval_obj) + print(f" โž• {eval_obj['name']}") + + # Generate dynamic evaluations + print("\n๐ŸŽฒ Creating dynamic evaluations...") + dynamic_evals = create_dynamic_evaluations(generator) + + for eval_obj in dynamic_evals: + stack.push(eval_obj) + print(f" โž• {eval_obj['name']} (category: {eval_obj['metadata']['category']})") + + print(f"\n๐Ÿ“Š Total evaluations created: {stack.size()}") + + # Create server + server = EvalServer( + auth_key='programmatic-demo', + host='127.0.0.1', + port=8080, + log_level='INFO', + log_dir='./logs', + max_concurrent_evaluations=5, # Allow more concurrent evaluations + ) + + # Track evaluation results with detailed analysis + results = { + 'completed': [], + 'failed': [], + 'by_category': {}, + 'by_difficulty': {}, + 'timing': [], + } + + @server.on_connect + async def handle_client(client): + print(f'\n๐Ÿ”— Client connected: {client.id}') + print(f' Processing {stack.size()} evaluations...') + + start_time = time.time() + processed = 0 + + while not stack.is_empty(): + evaluation = stack.pop() + if not evaluation: + break + + processed += 1 + eval_start = time.time() + + print(f'\n๐Ÿ“‹ [{processed}] {evaluation["name"]}') + print(f' Category: {evaluation["metadata"].get("category", "unknown")}') + print(f' Tool: {evaluation["tool"]}') + + try: + # Use concurrency-limited evaluation + result = await server.evaluate_with_concurrency_limit( + client, + evaluation, + timeout=evaluation.get("timeout", 30.0) + ) + + eval_duration = time.time() - eval_start + + # Record successful result + result_record = { + 'evaluation': evaluation, + 'result': result, + 'duration': eval_duration, + 'client_id': client.id, + 'timestamp': time.time(), + } + results['completed'].append(result_record) + + # Update category stats + category = evaluation["metadata"].get("category", "unknown") + if category not in results['by_category']: + results['by_category'][category] = {'completed': 0, 'failed': 0} + results['by_category'][category]['completed'] += 1 + + # Update difficulty stats + difficulty = evaluation["metadata"].get("difficulty", "unknown") + if difficulty not in results['by_difficulty']: + results['by_difficulty'][difficulty] = {'completed': 0, 'failed': 0} + results['by_difficulty'][difficulty]['completed'] += 1 + + # Record timing + results['timing'].append(eval_duration) + + print(f' โœ… Completed in {eval_duration:.2f}s') + + # Show preview of response + if "output" in result and "response" in result["output"]: + response = result["output"]["response"] + preview = response[:150] + "..." if len(response) > 150 else response + print(f' ๐Ÿ’ฌ "{preview}"') + + except Exception as e: + eval_duration = time.time() - eval_start + + # Record failed result + failure_record = { + 'evaluation': evaluation, + 'error': str(e), + 'duration': eval_duration, + 'client_id': client.id, + 'timestamp': time.time(), + } + results['failed'].append(failure_record) + + # Update stats + category = evaluation["metadata"].get("category", "unknown") + if category not in results['by_category']: + results['by_category'][category] = {'completed': 0, 'failed': 0} + results['by_category'][category]['failed'] += 1 + + difficulty = evaluation["metadata"].get("difficulty", "unknown") + if difficulty not in results['by_difficulty']: + results['by_difficulty'][difficulty] = {'completed': 0, 'failed': 0} + results['by_difficulty'][difficulty]['failed'] += 1 + + print(f' โŒ Failed after {eval_duration:.2f}s: {e}') + + total_duration = time.time() - start_time + print(f'\n๐Ÿ Batch completed in {total_duration:.2f}s') + print(f' Processed: {processed}') + print(f' Success rate: {len(results["completed"])/processed*100:.1f}%') + + # Send detailed completion message + await client.send_message({ + "type": "batch_analysis", + "total_processed": processed, + "completed": len(results['completed']), + "failed": len(results['failed']), + "duration": total_duration, + "average_eval_time": sum(results['timing']) / len(results['timing']) if results['timing'] else 0, + "categories": list(results['by_category'].keys()), + }) + + @server.on_disconnect + async def handle_disconnect(client_info): + print(f'\n๐Ÿ”Œ Client disconnected: {client_info["id"]}') + + # Show detailed analysis + total = len(results['completed']) + len(results['failed']) + if total > 0: + print(f'\n๐Ÿ“ˆ Final Analysis:') + print(f' Total evaluations: {total}') + print(f' Successful: {len(results["completed"])} ({len(results["completed"])/total*100:.1f}%)') + print(f' Failed: {len(results["failed"])} ({len(results["failed"])/total*100:.1f}%)') + + if results['timing']: + avg_time = sum(results['timing']) / len(results['timing']) + min_time = min(results['timing']) + max_time = max(results['timing']) + print(f' Average time: {avg_time:.2f}s (min: {min_time:.2f}s, max: {max_time:.2f}s)') + + print(f'\n๐Ÿ“Š By Category:') + for category, stats in results['by_category'].items(): + total_cat = stats['completed'] + stats['failed'] + success_rate = stats['completed'] / total_cat * 100 if total_cat > 0 else 0 + print(f' {category}: {total_cat} total, {success_rate:.1f}% success') + + if any(results['by_difficulty'].values()): + print(f'\n๐ŸŽฏ By Difficulty:') + for difficulty, stats in results['by_difficulty'].items(): + if difficulty != "unknown": + total_diff = stats['completed'] + stats['failed'] + success_rate = stats['completed'] / total_diff * 100 if total_diff > 0 else 0 + print(f' {difficulty}: {total_diff} total, {success_rate:.1f}% success') + + # Start server + try: + await server.start() + print(f'\n๐Ÿš€ Server running on ws://{server.config.host}:{server.config.port}') + print(' Connect your agent client to start processing evaluations') + print(' Press Ctrl+C to stop the server') + + # Keep server running + await server.wait_closed() + + except KeyboardInterrupt: + print('\n๐Ÿ›‘ Received interrupt signal, stopping server...') + await server.stop() + print('โœ… Server stopped successfully') + + except Exception as e: + print(f'๐Ÿ’ฅ Server error: {e}') + if server.is_running(): + await server.stop() + + +if __name__ == "__main__": + # Ensure logs directory exists + Path("./logs").mkdir(exist_ok=True) + + try: + asyncio.run(main()) + except KeyboardInterrupt: + print('\n๐Ÿ‘‹ Goodbye!') + except Exception as e: + print(f'๐Ÿ’ฅ Fatal error: {e}') + sys.exit(1) \ No newline at end of file diff --git a/eval-server/python/examples/with_stack.py b/eval-server/python/examples/with_stack.py new file mode 100644 index 0000000..f4b5d20 --- /dev/null +++ b/eval-server/python/examples/with_stack.py @@ -0,0 +1,201 @@ +#!/usr/bin/env python3 +""" +EvalServer with EvaluationStack example. + +This example demonstrates using an EvaluationStack to queue evaluations +and distribute them across multiple client connections. +""" + +import asyncio +import sys +from pathlib import Path + +# Add src to path for local development +sys.path.insert(0, str(Path(__file__).parent.parent / "src")) + +from bo_eval_server import EvalServer, EvaluationStack + + +def create_sample_evaluations(): + """Create evaluations matching NodeJS multiple-evals.js exactly.""" + evaluations = [ + { + "id": "math_eval", + "name": "Basic Math Problem", + "description": "Simple arithmetic evaluation", + "tool": "chat", + "input": { + "message": "What is 15 * 7 + 23? Please show your calculation steps." + } + }, + { + "id": "geography_eval", + "name": "Capital of France", + "description": "Geography knowledge test", + "tool": "chat", + "input": { + "message": "What is the capital of France?" + } + }, + { + "id": "creative_eval", + "name": "Creative Writing", + "description": "Short creative writing task", + "tool": "chat", + "input": { + "message": "Write a two-sentence story about a robot discovering friendship." + } + }, + { + "id": "tech_eval", + "name": "Technology Knowledge", + "description": "Basic technology concepts", + "tool": "chat", + "input": { + "message": "Explain what HTTP stands for and what it's used for in simple terms." + } + } + ] + return evaluations + + +async def main(): + """Main example function for evaluation stack usage.""" + # Create evaluation stack and populate it + stack = EvaluationStack() + sample_evaluations = create_sample_evaluations() + + print(f"๐Ÿ“š Created {len(sample_evaluations)} sample evaluations") + + # Add evaluations to stack (LIFO order) + for evaluation in sample_evaluations: + stack.push(evaluation) + print(f" โž• Added: {evaluation['name']}") + + print(f"๐Ÿ“Š Stack size: {stack.size()}") + print(f"๐Ÿ” Top evaluation: {stack.peek()['name'] if stack.peek() else 'None'}") + + # Create server + server = EvalServer( + auth_key='stack-demo', + host='127.0.0.1', + port=8080, + log_level='INFO', + log_dir='./logs', + ) + + # Track processed evaluations + completed_evaluations = [] + failed_evaluations = [] + + @server.on_connect + async def handle_client(client): + print('๐ŸŽ‰ CLIENT CONNECTED!') + print(f' - Client ID: {client.id}') + print(f' - Client tabId: {client.tab_id}') + print(f' - Client info: {client.get_info()}') + + # Check if we have evaluations left in the stack + if stack.is_empty(): + print('โš ๏ธ No more evaluations in stack for this client') + print(' Consider refilling the stack or handling this scenario') + return + + # Pop the next evaluation from the stack (ONE evaluation per client!) + evaluation = stack.pop() + print(f'๐Ÿ“‹ Assigning evaluation: "{evaluation["name"]}" ({evaluation["id"]})') + print(f'๐Ÿ“Š Remaining evaluations in stack: {stack.size()}') + + try: + print('๐Ÿ”„ Starting evaluation...') + result = await client.evaluate(evaluation) + + print('โœ… Evaluation completed!') + print(f'๐Ÿ“Š Response for "{evaluation["name"]}": {result}') + + completed_evaluations.append({ + 'client_id': client.id, + 'evaluation': evaluation, + 'result': result, + }) + + except Exception as e: + print(f'โŒ Evaluation "{evaluation["name"]}" failed: {e}') + + failed_evaluations.append({ + 'client_id': client.id, + 'evaluation': evaluation, + 'error': str(e), + }) + + # Send completion message + try: + await client.send_message({ + "type": "evaluation_complete", + "evaluation_id": evaluation["id"], + "evaluation_name": evaluation["name"], + "status": "completed" if evaluation["id"] not in [e['evaluation']['id'] for e in failed_evaluations] else "failed" + }) + except Exception as e: + print(f' โš ๏ธ Failed to send completion message: {e}') + + @server.on_disconnect + async def handle_disconnect(client_info): + print(f'\n๐Ÿ”Œ Client disconnected: {client_info["id"]}') + + # Show final statistics + total_completed = len(completed_evaluations) + total_failed = len(failed_evaluations) + remaining = stack.size() + + print(f'\n๐Ÿ“Š Final Statistics:') + print(f' โœ… Completed: {total_completed}') + print(f' โŒ Failed: {total_failed}') + print(f' ๐Ÿ“š Remaining: {remaining}') + + if completed_evaluations: + print(f'\n๐ŸŽฏ Completed Evaluations:') + for item in completed_evaluations: + eval_name = item['evaluation']['name'] + client_id = item['client_id'][:8] # Short client ID + print(f' โ€ข {eval_name} (client: {client_id})') + + if failed_evaluations: + print(f'\n๐Ÿ’ฅ Failed Evaluations:') + for item in failed_evaluations: + eval_name = item['evaluation']['name'] + error = item['error'] + print(f' โ€ข {eval_name}: {error}') + + # Start server + try: + await server.start() + print(f'\n๐Ÿš€ Server running on ws://{server.config.host}:{server.config.port}') + print(' Connect your agent client to start processing evaluations') + print(' Press Ctrl+C to stop the server') + + # Keep server running + await server.wait_closed() + + except KeyboardInterrupt: + print('\n๐Ÿ›‘ Received interrupt signal, stopping server...') + await server.stop() + print('โœ… Server stopped successfully') + + except Exception as e: + print(f'๐Ÿ’ฅ Server error: {e}') + if server.is_running(): + await server.stop() + + +if __name__ == "__main__": + # Ensure logs directory exists + Path("./logs").mkdir(exist_ok=True) + + try: + asyncio.run(main()) + except KeyboardInterrupt: + print('\n๐Ÿ‘‹ Goodbye!') + except Exception as e: + print(f'๐Ÿ’ฅ Fatal error: {e}') + sys.exit(1) \ No newline at end of file diff --git a/eval-server/python/logs/.gitignore b/eval-server/python/logs/.gitignore new file mode 100644 index 0000000..326f777 --- /dev/null +++ b/eval-server/python/logs/.gitignore @@ -0,0 +1,2 @@ +*.log +*.jsonl \ No newline at end of file diff --git a/eval-server/python/pyproject.toml b/eval-server/python/pyproject.toml new file mode 100644 index 0000000..83d30ee --- /dev/null +++ b/eval-server/python/pyproject.toml @@ -0,0 +1,84 @@ +[build-system] +requires = ["setuptools>=61.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "bo-eval-server" +version = "1.0.0" +description = "WebSocket server for evaluating LLM agents - Python implementation" +readme = "README.md" +license = {text = "MIT"} +authors = [ + {name = "Browser Operator Team"} +] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: Software Development :: Libraries :: Python Modules", + "Topic :: System :: Networking", +] +keywords = ["websocket", "llm", "evaluation", "rpc", "library", "programmatic"] +requires-python = ">=3.8" +dependencies = [ + "websockets>=11.0.0", + "loguru>=0.7.0", + "pandas>=2.0.0", + "requests>=2.31.0", +] + +[project.optional-dependencies] +dev = [ + "pytest>=7.0.0", + "pytest-asyncio>=0.21.0", + "black>=23.0.0", + "mypy>=1.0.0", +] + +[project.urls] +Homepage = "https://github.com/chromium/devtools-frontend" +Repository = "https://github.com/chromium/devtools-frontend" +Issues = "https://github.com/chromium/devtools-frontend/issues" + +[project.scripts] +bo-eval-basic = "scripts:run_basic_server" +bo-eval-stack = "scripts:run_with_stack" +bo-eval-programmatic = "scripts:run_programmatic_evals" + +[tool.setuptools.packages.find] +where = ["src"] + +[tool.setuptools.package-data] +"*" = ["*.md", "*.txt", "*.yaml", "*.json"] + +[tool.black] +line-length = 88 +target-version = ['py38'] + +[tool.mypy] +python_version = "3.8" +warn_return_any = true +warn_unused_configs = true +disallow_untyped_defs = true + +[tool.pytest.ini_options] +asyncio_mode = "auto" +testpaths = ["tests"] +python_files = ["test_*.py"] +python_classes = ["Test*"] +python_functions = ["test_*"] + +[dependency-groups] +dev = [ + "black>=24.8.0", + "mypy>=1.14.1", + "pytest>=8.3.5", + "pytest-asyncio>=0.24.0", +] diff --git a/eval-server/python/quick_test.py b/eval-server/python/quick_test.py new file mode 100644 index 0000000..5bf5b9a --- /dev/null +++ b/eval-server/python/quick_test.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +"""Quick test to see what's happening with the server.""" + +import asyncio +import json +import websockets + +async def test_server(): + print("๐Ÿ”— Testing server connection...") + try: + async with websockets.connect('ws://127.0.0.1:8080') as ws: + print("โœ… Connected to server") + + # Wait for welcome message + print("โณ Waiting for welcome message...") + welcome = await asyncio.wait_for(ws.recv(), timeout=5.0) + print(f"๐Ÿ“ฅ Welcome: {welcome}") + + # Send registration + registration = { + "type": "register", + "clientId": "test-client-123", + "secretKey": "hello", + "capabilities": ["chat"] + } + print(f"๐Ÿ“ค Sending registration: {json.dumps(registration)}") + await ws.send(json.dumps(registration)) + + # Wait for ack + print("โณ Waiting for registration ack...") + ack = await asyncio.wait_for(ws.recv(), timeout=5.0) + print(f"๐Ÿ“ฅ Registration ack: {ack}") + + except Exception as e: + print(f"โŒ Error: {e}") + +if __name__ == "__main__": + asyncio.run(test_server()) \ No newline at end of file diff --git a/eval-server/python/requirements.txt b/eval-server/python/requirements.txt new file mode 100644 index 0000000..e9fc8ca --- /dev/null +++ b/eval-server/python/requirements.txt @@ -0,0 +1,10 @@ +# Core dependencies +websockets>=11.0.0 +loguru>=0.7.0 + +# Development dependencies (optional) +# Install with: pip install -e ".[dev]" +# pytest>=7.0.0 +# pytest-asyncio>=0.21.0 +# black>=23.0.0 +# mypy>=1.0.0 \ No newline at end of file diff --git a/eval-server/python/run.py b/eval-server/python/run.py new file mode 100644 index 0000000..407cd68 --- /dev/null +++ b/eval-server/python/run.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 +""" +Simple script runner for bo-eval-server examples. + +Usage: + python run.py basic # Run basic server example + python run.py stack # Run evaluation stack example + python run.py prog # Run programmatic evaluations example + python run.py all # Show all available examples +""" + +import subprocess +import sys +from pathlib import Path + + +def run_with_uv(script_path: str, description: str): + """Run a Python script using uv.""" + print(f"๐Ÿš€ {description}") + print(f" Running: uv run python {script_path}") + print("-" * 50) + + try: + # Ensure logs directory exists + logs_dir = Path("logs") + logs_dir.mkdir(exist_ok=True) + + # Run the script with uv + result = subprocess.run([ + "uv", "run", "python", script_path + ], cwd=Path(__file__).parent) + + return result.returncode + + except KeyboardInterrupt: + print("\n๐Ÿ›‘ Interrupted by user") + return 130 + except FileNotFoundError: + print("โŒ Error: 'uv' command not found. Please install uv first:") + print(" curl -LsSf https://astral.sh/uv/install.sh | sh") + return 1 + except Exception as e: + print(f"๐Ÿ’ฅ Error running script: {e}") + return 1 + + +def show_examples(): + """Show all available examples.""" + print("๐Ÿ“š Available Examples:") + print() + print("๐Ÿ”ง basic - Basic WebSocket server setup") + print(" Simple server that connects to one client and runs a single evaluation") + print() + print("๐Ÿ“š stack - Evaluation stack usage") + print(" Demonstrates LIFO queue for managing multiple evaluations") + print() + print("๐Ÿญ prog - Programmatic evaluation creation") + print(" Advanced example with dynamic evaluation generation and analytics") + print() + print("Usage:") + print(" python run.py basic") + print(" python run.py stack") + print(" python run.py prog") + print() + print("Or with uv directly:") + print(" uv run python examples/basic_server.py") + print(" uv run python examples/with_stack.py") + print(" uv run python examples/programmatic_evals.py") + + +def main(): + """Main entry point.""" + if len(sys.argv) != 2: + print("Usage: python run.py [basic|stack|prog|all]") + print(" python run.py all # Show all examples") + sys.exit(1) + + command = sys.argv[1].lower() + + examples = { + "basic": ("examples/basic_server.py", "Basic WebSocket Server Example"), + "stack": ("examples/with_stack.py", "Evaluation Stack Example"), + "prog": ("examples/programmatic_evals.py", "Programmatic Evaluations Example"), + "programmatic": ("examples/programmatic_evals.py", "Programmatic Evaluations Example"), + } + + if command == "all": + show_examples() + return 0 + elif command in examples: + script_path, description = examples[command] + return run_with_uv(script_path, description) + else: + print(f"โŒ Unknown command: {command}") + print("Available commands: basic, stack, prog, all") + return 1 + + +if __name__ == "__main__": + sys.exit(main()) \ No newline at end of file diff --git a/eval-server/python/scripts.py b/eval-server/python/scripts.py new file mode 100644 index 0000000..b57377d --- /dev/null +++ b/eval-server/python/scripts.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 +""" +Script runner for bo-eval-server examples using uv. + +This module provides entry points for running examples with uv. +""" + +import asyncio +import sys +from pathlib import Path + +# Add the examples directory to path +examples_dir = Path(__file__).parent / "examples" +sys.path.insert(0, str(examples_dir)) + + +def run_basic_server(): + """Run the basic server example.""" + from examples.basic_server import main + try: + asyncio.run(main()) + except KeyboardInterrupt: + print('\n๐Ÿ‘‹ Goodbye!') + except Exception as e: + print(f'๐Ÿ’ฅ Error: {e}') + sys.exit(1) + + +def run_with_stack(): + """Run the evaluation stack example.""" + from examples.with_stack import main + try: + asyncio.run(main()) + except KeyboardInterrupt: + print('\n๐Ÿ‘‹ Goodbye!') + except Exception as e: + print(f'๐Ÿ’ฅ Error: {e}') + sys.exit(1) + + +def run_programmatic_evals(): + """Run the programmatic evaluations example.""" + from examples.programmatic_evals import main + try: + asyncio.run(main()) + except KeyboardInterrupt: + print('\n๐Ÿ‘‹ Goodbye!') + except Exception as e: + print(f'๐Ÿ’ฅ Error: {e}') + sys.exit(1) + + +if __name__ == "__main__": + if len(sys.argv) != 2: + print("Usage: python scripts.py [basic|stack|programmatic]") + sys.exit(1) + + script = sys.argv[1] + if script == "basic": + run_basic_server() + elif script == "stack": + run_with_stack() + elif script == "programmatic": + run_programmatic_evals() + else: + print(f"Unknown script: {script}") + print("Available scripts: basic, stack, programmatic") + sys.exit(1) \ No newline at end of file diff --git a/eval-server/python/src/bo_eval_server/__init__.py b/eval-server/python/src/bo_eval_server/__init__.py new file mode 100644 index 0000000..3a8b6aa --- /dev/null +++ b/eval-server/python/src/bo_eval_server/__init__.py @@ -0,0 +1,29 @@ +""" +bo-eval-server: A minimal Python library for WebSocket-based LLM agent evaluation servers. + +This package provides core functionality for: +- WebSocket server for agent connections +- JSON-RPC 2.0 bidirectional communication +- Evaluation stack for managing evaluation queues +- Enhanced logging and client management +""" + +from .eval_server import EvalServer +from .evaluation_stack import EvaluationStack +from .client_manager import ClientManager, ClientProxy +from .rpc_client import RpcClient +from .config import Config +from .logger import setup_logger + +__version__ = "1.0.0" +__author__ = "Browser Operator Team" + +__all__ = [ + "EvalServer", + "EvaluationStack", + "ClientManager", + "ClientProxy", + "RpcClient", + "Config", + "setup_logger", +] \ No newline at end of file diff --git a/eval-server/python/src/bo_eval_server/client_manager.py b/eval-server/python/src/bo_eval_server/client_manager.py new file mode 100644 index 0000000..280f676 --- /dev/null +++ b/eval-server/python/src/bo_eval_server/client_manager.py @@ -0,0 +1,401 @@ +""" +Client management for WebSocket connections. + +Handles client registration, authentication, and provides a proxy interface +for interacting with connected agents. +""" + +import asyncio +import json +import time +import uuid +from typing import Dict, Any, Optional, List, Callable, Awaitable + +import websockets +from loguru import logger + +from .rpc_client import RpcClient, RpcError, RpcTimeoutError +from .logger import log_connection, log_evaluation + + +class ClientProxy: + """Proxy object for interacting with a connected agent.""" + + def __init__( + self, + client_id: str, + websocket: websockets.WebSocketServerProtocol, + rpc_client: RpcClient, + tab_id: Optional[str] = None, + base_client_id: Optional[str] = None, + capabilities: Optional[List[str]] = None, + ): + """ + Initialize client proxy. + + Args: + client_id: Unique client identifier + websocket: WebSocket connection + rpc_client: RPC client for method calls + tab_id: Browser tab ID (if applicable) + base_client_id: Base client ID for grouping + capabilities: List of agent capabilities + """ + self.id = client_id + self.tab_id = tab_id + self.base_client_id = base_client_id or client_id + self.capabilities = capabilities or [] + self._websocket = websocket + self._rpc_client = rpc_client + self._connected_at = time.time() + + async def evaluate( + self, + evaluation: Dict[str, Any], + timeout: Optional[float] = None, + ) -> Dict[str, Any]: + """ + Execute an evaluation on the connected agent. + + Args: + evaluation: Evaluation object with required fields + timeout: Optional timeout override + + Returns: + Evaluation result from the agent + + Raises: + ValueError: If evaluation is invalid + RpcError: If the RPC call fails + RpcTimeoutError: If the call times out + """ + # Validate evaluation object + required_fields = ['id', 'name', 'tool', 'input'] + for field in required_fields: + if field not in evaluation: + raise ValueError(f"Evaluation missing required field: {field}") + + evaluation_id = evaluation['id'] + start_time = time.time() + + try: + # Log evaluation start + log_evaluation( + evaluation_id=evaluation_id, + client_id=self.id, + status="started", + evaluation_name=evaluation.get('name'), + tool=evaluation.get('tool'), + ) + + # Make RPC call to agent + result = await self._rpc_client.call( + method="evaluate", + params=evaluation, + timeout=timeout, + client_id=self.id, + ) + + duration = time.time() - start_time + + # Log evaluation completion + log_evaluation( + evaluation_id=evaluation_id, + client_id=self.id, + status="completed", + duration=duration, + evaluation_name=evaluation.get('name'), + tool=evaluation.get('tool'), + ) + + return result + + except RpcTimeoutError: + duration = time.time() - start_time + log_evaluation( + evaluation_id=evaluation_id, + client_id=self.id, + status="timeout", + duration=duration, + evaluation_name=evaluation.get('name'), + tool=evaluation.get('tool'), + ) + raise + + except Exception as e: + duration = time.time() - start_time + log_evaluation( + evaluation_id=evaluation_id, + client_id=self.id, + status="failed", + duration=duration, + error=str(e), + evaluation_name=evaluation.get('name'), + tool=evaluation.get('tool'), + ) + raise + + async def send_message(self, message: Dict[str, Any]) -> None: + """ + Send a custom message to the connected agent. + + Args: + message: Message object to send + """ + try: + await self._websocket.send(json.dumps(message)) + except Exception as e: + logger.error(f"Failed to send message to client {self.id}: {e}") + raise + + def get_info(self) -> Dict[str, Any]: + """ + Get client information. + + Returns: + Dictionary with client details + """ + return { + 'id': self.id, + 'tab_id': self.tab_id, + 'base_client_id': self.base_client_id, + 'capabilities': self.capabilities, + 'connected_at': self._connected_at, + 'connected': self._rpc_client.is_connected(), + } + + def is_connected(self) -> bool: + """Check if the client is still connected.""" + return self._rpc_client.is_connected() + + def __repr__(self) -> str: + """String representation of the client proxy.""" + return f"ClientProxy(id={self.id}, connected={self.is_connected()})" + + +class ClientManager: + """Manages WebSocket client connections and authentication.""" + + def __init__(self, auth_key: str, rpc_timeout: float = 1500.0): + """ + Initialize client manager. + + Args: + auth_key: Required authentication key for clients + rpc_timeout: Default RPC timeout in seconds + """ + self.auth_key = auth_key + self.rpc_timeout = rpc_timeout + self._clients: Dict[str, ClientProxy] = {} + self._pending_connections: Dict[str, Dict[str, Any]] = {} + + # Event handlers + self._on_connect_handler: Optional[Callable[[ClientProxy], Awaitable[None]]] = None + self._on_disconnect_handler: Optional[Callable[[Dict[str, Any]], Awaitable[None]]] = None + + def on_connect(self, handler: Callable[[ClientProxy], Awaitable[None]]) -> None: + """Set the handler for client connections.""" + self._on_connect_handler = handler + + def on_disconnect(self, handler: Callable[[Dict[str, Any]], Awaitable[None]]) -> None: + """Set the handler for client disconnections.""" + self._on_disconnect_handler = handler + + async def handle_connection(self, websocket: websockets.WebSocketServerProtocol) -> None: + """ + Handle a new WebSocket connection - matches NodeJS EvalServer flow. + + Args: + websocket: WebSocket connection + """ + connection_id = str(uuid.uuid4()) + client_proxy: Optional[ClientProxy] = None + + try: + # Send welcome message immediately (like NodeJS) + welcome_message = { + 'type': 'welcome', + 'serverId': 'python-eval-server-001', + 'version': '1.0.0', + 'timestamp': time.time() + } + logger.debug(f"Sending welcome message to connection {connection_id}") + await websocket.send(json.dumps(welcome_message)) + + # Wait for registration message + client_proxy = await self._authenticate_client(websocket, connection_id) + + if client_proxy: + # Start RPC client + await client_proxy._rpc_client.start() + + # Add to active clients + self._clients[client_proxy.id] = client_proxy + + # Call connection handler + if self._on_connect_handler: + await self._on_connect_handler(client_proxy) + + # Keep connection alive until closed + await client_proxy._rpc_client._message_handler_task + + except websockets.exceptions.ConnectionClosed: + logger.debug(f"WebSocket connection closed: {connection_id}") + except Exception as e: + logger.error(f"Error handling connection {connection_id}: {e}") + finally: + # Clean up on disconnect + if client_proxy: + await self._handle_disconnect(client_proxy) + + async def _authenticate_client( + self, + websocket: websockets.WebSocketServerProtocol, + connection_id: str, + ) -> Optional[ClientProxy]: + """Authenticate and register a client connection - matches NodeJS implementation.""" + try: + logger.debug(f"Waiting for registration message from connection {connection_id}") + # Wait for registration message with timeout + message = await asyncio.wait_for(websocket.recv(), timeout=30.0) + logger.debug(f"Received message from {connection_id}: {message}") + data = json.loads(message) + + if data.get('type') != 'register': + logger.warning(f"Invalid first message from {connection_id}: expected 'register', got '{data.get('type')}'") + await websocket.send(json.dumps({ + 'type': 'registration_ack', + 'status': 'rejected', + 'message': 'First message must be registration' + })) + return None + + # Auto-accept clients like NodeJS does (NodeJS auto-creates client configs) + # For simplicity, we'll accept any client with the correct secret key or no secret key + if 'secretKey' in data: + if data.get('secretKey') != self.auth_key: + logger.warning(f"Invalid auth key from {connection_id}: expected '{self.auth_key}', got '{data.get('secretKey')}'") + await websocket.send(json.dumps({ + 'type': 'registration_ack', + 'clientId': data.get('clientId', str(uuid.uuid4())), + 'status': 'rejected', + 'message': 'Invalid authentication key' + })) + return None + else: + logger.debug(f"Valid secret key provided by {connection_id}") + else: + logger.debug(f"No secret key provided by {connection_id}, accepting anyway") + + client_id = data.get('clientId', str(uuid.uuid4())) + tab_id = data.get('tabId') + base_client_id = data.get('baseClientId') + capabilities = data.get('capabilities', []) + + logger.info(f"Registering client {client_id} from connection {connection_id}") + logger.debug(f"Client capabilities: {capabilities}") + + # Send registration acknowledgment + registration_response = { + 'type': 'registration_ack', + 'clientId': client_id, + 'status': 'accepted', + 'message': 'Client registered successfully' + } + logger.debug(f"Sending registration ack to {client_id}: {registration_response}") + await websocket.send(json.dumps(registration_response)) + + # Wait for ready signal + logger.debug(f"Waiting for ready signal from client {client_id}") + ready_message = await asyncio.wait_for(websocket.recv(), timeout=30.0) + logger.debug(f"Received ready message from {client_id}: {ready_message}") + ready_data = json.loads(ready_message) + + if ready_data.get('type') != 'ready': + logger.warning(f"Invalid ready message from {client_id}: expected 'ready', got '{ready_data.get('type')}'") + await websocket.send(json.dumps({ + 'type': 'error', + 'message': 'Expected ready signal after registration' + })) + return None + + logger.info(f"Client {client_id} is ready for evaluations") + + # Create RPC client and proxy + rpc_client = RpcClient(websocket, self.rpc_timeout) + client_proxy = ClientProxy( + client_id=client_id, + websocket=websocket, + rpc_client=rpc_client, + tab_id=tab_id, + base_client_id=base_client_id, + capabilities=capabilities, + ) + + # Log successful connection + log_connection( + event="connect", + client_id=client_id, + tab_id=tab_id, + base_client_id=base_client_id, + capabilities=capabilities, + ) + + return client_proxy + + except asyncio.TimeoutError: + logger.warning(f"Client registration timeout: {connection_id}") + return None + except json.JSONDecodeError: + logger.warning(f"Invalid JSON in registration: {connection_id}") + return None + except Exception as e: + logger.error(f"Error during client authentication: {e}") + return None + + async def _handle_disconnect(self, client_proxy: ClientProxy) -> None: + """Handle client disconnection cleanup.""" + client_id = client_proxy.id + + # Remove from active clients + self._clients.pop(client_id, None) + + # Stop RPC client + await client_proxy._rpc_client.stop() + + # Get client info for disconnect handler + client_info = client_proxy.get_info() + + # Log disconnection + log_connection( + event="disconnect", + client_id=client_id, + tab_id=client_proxy.tab_id, + base_client_id=client_proxy.base_client_id, + ) + + # Call disconnect handler + if self._on_disconnect_handler: + try: + await self._on_disconnect_handler(client_info) + except Exception as e: + logger.error(f"Error in disconnect handler: {e}") + + def get_clients(self) -> List[ClientProxy]: + """Get list of connected clients.""" + return list(self._clients.values()) + + def get_client(self, client_id: str) -> Optional[ClientProxy]: + """Get a specific client by ID.""" + return self._clients.get(client_id) + + def get_status(self) -> Dict[str, Any]: + """Get client manager status.""" + return { + 'connected_clients': len(self._clients), + 'client_ids': list(self._clients.keys()), + } + + def __repr__(self) -> str: + """String representation of the client manager.""" + return f"ClientManager(clients={len(self._clients)})" \ No newline at end of file diff --git a/eval-server/python/src/bo_eval_server/config.py b/eval-server/python/src/bo_eval_server/config.py new file mode 100644 index 0000000..46e72b9 --- /dev/null +++ b/eval-server/python/src/bo_eval_server/config.py @@ -0,0 +1,75 @@ +""" +Configuration management for bo-eval-server. + +Handles server configuration with environment variable support. +""" + +import os +from typing import Optional + + +class Config: + """Configuration class for EvalServer with environment variable support.""" + + def __init__( + self, + host: Optional[str] = None, + port: Optional[int] = None, + auth_key: Optional[str] = None, + log_level: Optional[str] = None, + rpc_timeout: Optional[float] = None, + max_concurrent_evaluations: Optional[int] = None, + ): + """ + Initialize configuration with optional overrides. + + Args: + host: Server host (default: localhost) + port: Server port (default: 8080) + auth_key: Authentication key for clients + log_level: Logging level (default: INFO) + rpc_timeout: RPC call timeout in seconds (default: 1500.0) + max_concurrent_evaluations: Max concurrent evaluations (default: 10) + """ + self.host = host or os.getenv('BO_EVAL_SERVER_HOST', 'localhost') + self.port = int(port or os.getenv('BO_EVAL_SERVER_PORT', '8080')) + self.auth_key = auth_key or os.getenv('BO_EVAL_SERVER_AUTH_KEY') + self.log_level = log_level or os.getenv('BO_EVAL_SERVER_LOG_LEVEL', 'INFO') + self.rpc_timeout = float( + rpc_timeout or os.getenv('BO_EVAL_SERVER_RPC_TIMEOUT', '1500.0') + ) + self.max_concurrent_evaluations = int( + max_concurrent_evaluations or + os.getenv('BO_EVAL_SERVER_MAX_CONCURRENT', '10') + ) + + def validate(self) -> None: + """Validate configuration parameters.""" + if not self.auth_key: + raise ValueError("auth_key is required for server authentication") + + if not isinstance(self.port, int) or self.port <= 0 or self.port > 65535: + raise ValueError(f"Invalid port: {self.port}") + + if self.rpc_timeout <= 0: + raise ValueError(f"Invalid RPC timeout: {self.rpc_timeout}") + + if self.max_concurrent_evaluations <= 0: + raise ValueError( + f"Invalid max_concurrent_evaluations: {self.max_concurrent_evaluations}" + ) + + def to_dict(self) -> dict: + """Convert configuration to dictionary.""" + return { + 'host': self.host, + 'port': self.port, + 'auth_key': '***' if self.auth_key else None, # Hide sensitive data + 'log_level': self.log_level, + 'rpc_timeout': self.rpc_timeout, + 'max_concurrent_evaluations': self.max_concurrent_evaluations, + } + + def __repr__(self) -> str: + """String representation of configuration.""" + return f"Config({self.to_dict()})" \ No newline at end of file diff --git a/eval-server/python/src/bo_eval_server/eval_server.py b/eval-server/python/src/bo_eval_server/eval_server.py new file mode 100644 index 0000000..9f6ccb7 --- /dev/null +++ b/eval-server/python/src/bo_eval_server/eval_server.py @@ -0,0 +1,292 @@ +""" +EvalServer - Main WebSocket server for LLM agent evaluations. + +A library-first evaluation server that accepts connections from AI agents, +sends them evaluation tasks via RPC calls, and collects their responses. +""" + +import asyncio +from typing import Dict, Any, Optional, Callable, Awaitable, List + +import websockets +from loguru import logger + +from .config import Config +from .client_manager import ClientManager, ClientProxy +from .logger import setup_logger, log_server_event + + +class EvalServer: + """ + Main evaluation server class for managing WebSocket connections and evaluations. + + Example usage: + ```python + server = EvalServer( + auth_key='your-secret-key', + host='127.0.0.1', + port=8080 + ) + + @server.on_connect + async def handle_client(client): + print(f'Client connected: {client.id}') + + result = await client.evaluate({ + "id": "test_eval", + "name": "Test Evaluation", + "tool": "chat", + "input": {"message": "Hello world"} + }) + + print(f'Response: {result}') + + await server.start() + await server.wait_closed() + ``` + """ + + def __init__( + self, + auth_key: str, + host: str = 'localhost', + port: int = 8080, + rpc_timeout: float = 1500.0, + log_level: str = 'INFO', + log_dir: Optional[str] = None, + max_concurrent_evaluations: int = 10, + ): + """ + Initialize the evaluation server. + + Args: + auth_key: Required authentication key for client connections + host: Server host address + port: Server port number + rpc_timeout: Default timeout for RPC calls in seconds + log_level: Logging level (DEBUG, INFO, WARNING, ERROR) + log_dir: Directory for log files (optional) + max_concurrent_evaluations: Maximum concurrent evaluations + """ + # Create and validate configuration + self.config = Config( + host=host, + port=port, + auth_key=auth_key, + log_level=log_level, + rpc_timeout=rpc_timeout, + max_concurrent_evaluations=max_concurrent_evaluations, + ) + self.config.validate() + + # Setup logging + setup_logger( + log_level=self.config.log_level, + log_dir=log_dir, + ) + + # Initialize client manager + self.client_manager = ClientManager( + auth_key=self.config.auth_key, + rpc_timeout=self.config.rpc_timeout, + ) + + # Server state + self._server: Optional[websockets.WebSocketServer] = None + self._running = False + self._start_time: Optional[float] = None + + # Evaluation concurrency control + self._evaluation_semaphore = asyncio.Semaphore( + self.config.max_concurrent_evaluations + ) + + def on_connect(self, handler: Callable[[ClientProxy], Awaitable[None]]) -> Callable: + """ + Decorator to set the client connection handler. + + Args: + handler: Async function to call when a client connects + + Returns: + The handler function (for decorator use) + """ + self.client_manager.on_connect(handler) + return handler + + def on_disconnect(self, handler: Callable[[Dict[str, Any]], Awaitable[None]]) -> Callable: + """ + Decorator to set the client disconnection handler. + + Args: + handler: Async function to call when a client disconnects + + Returns: + The handler function (for decorator use) + """ + self.client_manager.on_disconnect(handler) + return handler + + async def start(self) -> None: + """ + Start the WebSocket server. + + Raises: + RuntimeError: If server is already running + OSError: If unable to bind to the specified host/port + """ + if self._running: + raise RuntimeError("Server is already running") + + try: + logger.info(f"Starting EvalServer on {self.config.host}:{self.config.port}") + + # Start WebSocket server + self._server = await websockets.serve( + self.client_manager.handle_connection, + self.config.host, + self.config.port, + ping_interval=20, + ping_timeout=20, + close_timeout=10, + ) + + self._running = True + self._start_time = asyncio.get_event_loop().time() + + log_server_event( + event="start", + host=self.config.host, + port=self.config.port, + config=self.config.to_dict(), + ) + + logger.info(f"EvalServer started successfully on ws://{self.config.host}:{self.config.port}") + + except Exception as e: + logger.error(f"Failed to start server: {e}") + log_server_event(event="start_failed", error=str(e)) + raise + + async def stop(self) -> None: + """ + Stop the WebSocket server. + + Raises: + RuntimeError: If server is not running + """ + if not self._running: + raise RuntimeError("Server is not running") + + try: + logger.info("Stopping EvalServer...") + + if self._server: + self._server.close() + await self._server.wait_closed() + + self._running = False + self._start_time = None + + log_server_event(event="stop") + logger.info("EvalServer stopped successfully") + + except Exception as e: + logger.error(f"Error stopping server: {e}") + log_server_event(event="stop_failed", error=str(e)) + raise + + async def wait_closed(self) -> None: + """ + Wait for the server to be closed. + + This method blocks until the server is stopped, useful for keeping + the server running in the main program. + """ + if not self._running or not self._server: + return + + try: + await self._server.wait_closed() + except Exception as e: + logger.error(f"Error waiting for server closure: {e}") + + def get_status(self) -> Dict[str, Any]: + """ + Get server status information. + + Returns: + Dictionary with server status details + """ + uptime = None + if self._running and self._start_time: + uptime = asyncio.get_event_loop().time() - self._start_time + + return { + 'running': self._running, + 'host': self.config.host, + 'port': self.config.port, + 'uptime': uptime, + 'config': self.config.to_dict(), + 'clients': self.client_manager.get_status(), + } + + def get_clients(self) -> List[ClientProxy]: + """ + Get list of connected clients. + + Returns: + List of ClientProxy objects + """ + return self.client_manager.get_clients() + + def get_client(self, client_id: str) -> Optional[ClientProxy]: + """ + Get a specific client by ID. + + Args: + client_id: Client identifier + + Returns: + ClientProxy object or None if not found + """ + return self.client_manager.get_client(client_id) + + async def evaluate_with_concurrency_limit( + self, + client: ClientProxy, + evaluation: Dict[str, Any], + timeout: Optional[float] = None, + ) -> Dict[str, Any]: + """ + Execute an evaluation with concurrency limiting. + + Args: + client: Client to execute evaluation on + evaluation: Evaluation object + timeout: Optional timeout override + + Returns: + Evaluation result + """ + async with self._evaluation_semaphore: + return await client.evaluate(evaluation, timeout) + + def is_running(self) -> bool: + """Check if the server is currently running.""" + return self._running + + def __repr__(self) -> str: + """String representation of the server.""" + status = "running" if self._running else "stopped" + return f"EvalServer(status={status}, host={self.config.host}, port={self.config.port})" + + async def __aenter__(self): + """Async context manager entry.""" + await self.start() + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + """Async context manager exit.""" + if self._running: + await self.stop() \ No newline at end of file diff --git a/eval-server/python/src/bo_eval_server/evaluation_stack.py b/eval-server/python/src/bo_eval_server/evaluation_stack.py new file mode 100644 index 0000000..1ad5078 --- /dev/null +++ b/eval-server/python/src/bo_eval_server/evaluation_stack.py @@ -0,0 +1,102 @@ +""" +EvaluationStack - A simple stack-like structure for managing evaluations. + +Provides LIFO (Last In, First Out) access to evaluation objects. +Useful for distributing different evaluations across multiple client connections. +""" + +from typing import Dict, Any, List, Optional + + +class EvaluationStack: + """A LIFO stack for managing evaluation objects.""" + + def __init__(self) -> None: + """Initialize an empty evaluation stack.""" + self._evaluations: List[Dict[str, Any]] = [] + + def push(self, evaluation: Dict[str, Any]) -> None: + """ + Add an evaluation to the top of the stack. + + Args: + evaluation: The evaluation object to add + + Raises: + ValueError: If evaluation is invalid or missing required fields + """ + if not evaluation or not isinstance(evaluation, dict): + raise ValueError('Evaluation must be a valid dictionary') + + # Validate required fields + required_fields = ['id', 'name', 'tool', 'input'] + for field in required_fields: + if field not in evaluation or not evaluation[field]: + raise ValueError(f'Evaluation missing required field: {field}') + + self._evaluations.append(evaluation) + + def pop(self) -> Optional[Dict[str, Any]]: + """ + Remove and return the evaluation from the top of the stack. + + Returns: + The evaluation object, or None if stack is empty + """ + if self._evaluations: + return self._evaluations.pop() + return None + + def is_empty(self) -> bool: + """ + Check if the stack is empty. + + Returns: + True if stack has no evaluations + """ + return len(self._evaluations) == 0 + + def size(self) -> int: + """ + Get the number of evaluations in the stack. + + Returns: + The stack size + """ + return len(self._evaluations) + + def peek(self) -> Optional[Dict[str, Any]]: + """ + Peek at the top evaluation without removing it. + + Returns: + The top evaluation object, or None if stack is empty + """ + if self.is_empty(): + return None + return self._evaluations[-1] + + def clear(self) -> None: + """Clear all evaluations from the stack.""" + self._evaluations.clear() + + def to_array(self) -> List[Dict[str, Any]]: + """ + Get a copy of all evaluations in the stack (top to bottom). + + Returns: + List of evaluation objects from top to bottom + """ + return list(reversed(self._evaluations)) + + def __len__(self) -> int: + """Return the number of evaluations in the stack.""" + return len(self._evaluations) + + def __bool__(self) -> bool: + """Return True if stack has evaluations.""" + return not self.is_empty() + + def __repr__(self) -> str: + """String representation of the stack.""" + return f"EvaluationStack(size={self.size()})" \ No newline at end of file diff --git a/eval-server/python/src/bo_eval_server/logger.py b/eval-server/python/src/bo_eval_server/logger.py new file mode 100644 index 0000000..8f6e3c5 --- /dev/null +++ b/eval-server/python/src/bo_eval_server/logger.py @@ -0,0 +1,180 @@ +""" +Enhanced logging setup for bo-eval-server using loguru. + +Provides structured logging with JSON formatting and multiple log levels. +""" + +import sys +from pathlib import Path +from typing import Optional, Dict, Any + +from loguru import logger + + +def setup_logger( + log_level: str = "INFO", + log_dir: Optional[str] = None, + enable_json: bool = True, +) -> None: + """ + Setup enhanced logging with loguru. + + Args: + log_level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL) + log_dir: Directory for log files (default: ./logs) + enable_json: Whether to use JSON formatting for structured logs + """ + # Remove default handler + logger.remove() + + # Console handler with colored output + logger.add( + sys.stdout, + level=log_level, + format="{time:YYYY-MM-DD HH:mm:ss} | " + "{level: <8} | " + "{name}:{function}:{line} - " + "{message}", + colorize=True, + ) + + # File handlers if log_dir is specified + if log_dir: + log_path = Path(log_dir) + log_path.mkdir(exist_ok=True) + + # Combined log file + logger.add( + log_path / "combined.log", + level="DEBUG", + format="{time:YYYY-MM-DD HH:mm:ss} | {level: <8} | {name}:{function}:{line} - {message}", + rotation="10 MB", + retention="7 days", + ) + + # Error log file + logger.add( + log_path / "error.log", + level="ERROR", + format="{time:YYYY-MM-DD HH:mm:ss} | {level: <8} | {name}:{function}:{line} - {message}", + rotation="10 MB", + retention="30 days", + ) + + # Structured JSON log for evaluations + if enable_json: + logger.add( + log_path / "evaluations.jsonl", + level="INFO", + format="{message}", + filter=lambda record: record["extra"].get("event_type") == "evaluation", + rotation="10 MB", + retention="30 days", + ) + + +def log_connection(event: str, client_id: str, **kwargs) -> None: + """ + Log connection events with structured data. + + Args: + event: Connection event type (connect, disconnect, ready) + client_id: Client identifier + **kwargs: Additional event data + """ + logger.bind(event_type="connection").info( + f"Connection {event}: {client_id}", + extra={ + "event_type": "connection", + "connection_event": event, + "client_id": client_id, + **kwargs, + } + ) + + +def log_evaluation( + evaluation_id: str, + client_id: str, + status: str, + duration: Optional[float] = None, + **kwargs +) -> None: + """ + Log evaluation events with structured data. + + Args: + evaluation_id: Unique evaluation identifier + client_id: Client that handled the evaluation + status: Evaluation status (started, completed, failed, timeout) + duration: Evaluation duration in seconds + **kwargs: Additional evaluation data + """ + message = f"Evaluation {status}: {evaluation_id} (client: {client_id})" + if duration is not None: + message += f" ({duration:.2f}s)" + + log_data = { + "event_type": "evaluation", + "evaluation_id": evaluation_id, + "client_id": client_id, + "status": status, + "duration": duration, + **kwargs, + } + + logger.bind(event_type="evaluation").info(message, extra=log_data) + + +def log_rpc_call( + method: str, + client_id: str, + call_id: str, + status: str, + duration: Optional[float] = None, + **kwargs +) -> None: + """ + Log RPC call events with structured data. + + Args: + method: RPC method name + client_id: Target client identifier + call_id: RPC call identifier + status: Call status (sent, completed, failed, timeout) + duration: Call duration in seconds + **kwargs: Additional call data + """ + message = f"RPC {status}: {method} -> {client_id} (id: {call_id})" + if duration is not None: + message += f" ({duration:.2f}s)" + + log_data = { + "event_type": "rpc", + "method": method, + "client_id": client_id, + "call_id": call_id, + "status": status, + "duration": duration, + **kwargs, + } + + logger.bind(event_type="rpc").info(message, extra=log_data) + + +def log_server_event(event: str, **kwargs) -> None: + """ + Log server lifecycle events. + + Args: + event: Server event type (start, stop, error) + **kwargs: Additional event data + """ + logger.bind(event_type="server").info( + f"Server {event}", + extra={ + "event_type": "server", + "server_event": event, + **kwargs, + } + ) \ No newline at end of file diff --git a/eval-server/python/src/bo_eval_server/rpc_client.py b/eval-server/python/src/bo_eval_server/rpc_client.py new file mode 100644 index 0000000..8fc024b --- /dev/null +++ b/eval-server/python/src/bo_eval_server/rpc_client.py @@ -0,0 +1,229 @@ +""" +JSON-RPC 2.0 client implementation for calling methods on connected agents. + +Handles request/response correlation, timeouts, and error conditions. +""" + +import asyncio +import json +import time +import uuid +from typing import Dict, Any, Optional, Callable, Awaitable + +import websockets +from loguru import logger + +from .logger import log_rpc_call + + +class RpcError(Exception): + """Exception raised for RPC-related errors.""" + pass + + +class RpcTimeoutError(RpcError): + """Exception raised when RPC call times out.""" + pass + + +class RpcClient: + """JSON-RPC 2.0 client for bidirectional communication with agents.""" + + def __init__(self, websocket: websockets.WebSocketServerProtocol, timeout: float = 1500.0): + """ + Initialize RPC client for a WebSocket connection. + + Args: + websocket: WebSocket connection to the agent + timeout: Default timeout for RPC calls in seconds + """ + self.websocket = websocket + self.timeout = timeout + self._pending_calls: Dict[str, asyncio.Future] = {} + self._message_handler_task: Optional[asyncio.Task] = None + self._closed = False + + async def start(self) -> None: + """Start the RPC client message handler.""" + if self._message_handler_task is None: + self._message_handler_task = asyncio.create_task(self._handle_messages()) + + async def stop(self) -> None: + """Stop the RPC client and cancel pending calls.""" + self._closed = True + + # Cancel message handler + if self._message_handler_task: + self._message_handler_task.cancel() + try: + await self._message_handler_task + except asyncio.CancelledError: + pass + + # Cancel all pending calls + for future in self._pending_calls.values(): + if not future.done(): + future.cancel() + self._pending_calls.clear() + + async def call( + self, + method: str, + params: Optional[Dict[str, Any]] = None, + timeout: Optional[float] = None, + client_id: Optional[str] = None, + ) -> Any: + """ + Make an RPC call to the connected agent. + + Args: + method: RPC method name to call + params: Parameters to pass to the method + timeout: Timeout for this call (uses default if None) + client_id: Client ID for logging purposes + + Returns: + The result returned by the agent + + Raises: + RpcError: If the call fails or returns an error + RpcTimeoutError: If the call times out + ConnectionError: If the WebSocket connection is closed + """ + if self._closed: + raise ConnectionError("RPC client is closed") + + call_id = str(uuid.uuid4()) + call_timeout = timeout or self.timeout + + # Create JSON-RPC 2.0 request + request = { + "jsonrpc": "2.0", + "method": method, + "params": params or {}, + "id": call_id, + } + + # Create future for response + future: asyncio.Future = asyncio.Future() + self._pending_calls[call_id] = future + + start_time = time.time() + + try: + # Log RPC call start + log_rpc_call( + method=method, + client_id=client_id or "unknown", + call_id=call_id, + status="sent", + params=params, + ) + + # Send request + await self.websocket.send(json.dumps(request)) + + # Wait for response with timeout + try: + result = await asyncio.wait_for(future, timeout=call_timeout) + duration = time.time() - start_time + + # Log successful completion + log_rpc_call( + method=method, + client_id=client_id or "unknown", + call_id=call_id, + status="completed", + duration=duration, + ) + + return result + + except asyncio.TimeoutError: + duration = time.time() - start_time + + # Log timeout + log_rpc_call( + method=method, + client_id=client_id or "unknown", + call_id=call_id, + status="timeout", + duration=duration, + ) + + raise RpcTimeoutError(f"RPC call '{method}' timed out after {call_timeout}s") + + except Exception as e: + duration = time.time() - start_time + + # Log failure + log_rpc_call( + method=method, + client_id=client_id or "unknown", + call_id=call_id, + status="failed", + duration=duration, + error=str(e), + ) + + raise + + finally: + # Clean up pending call + self._pending_calls.pop(call_id, None) + + async def _handle_messages(self) -> None: + """Handle incoming WebSocket messages and route RPC responses.""" + try: + async for message in self.websocket: + if self._closed: + break + + try: + await self._process_message(message) + except Exception as e: + logger.error(f"Error processing RPC message: {e}") + + except websockets.exceptions.ConnectionClosed: + logger.debug("WebSocket connection closed in RPC message handler") + except Exception as e: + logger.error(f"Error in RPC message handler: {e}") + finally: + await self.stop() + + async def _process_message(self, message: str) -> None: + """Process a single WebSocket message.""" + try: + data = json.loads(message) + except json.JSONDecodeError as e: + logger.warning(f"Invalid JSON in RPC message: {e}") + return + + # Handle JSON-RPC 2.0 responses + if isinstance(data, dict) and "jsonrpc" in data and "id" in data: + call_id = data["id"] + future = self._pending_calls.get(call_id) + + if future and not future.done(): + if "result" in data: + # Successful response + future.set_result(data["result"]) + elif "error" in data: + # Error response + error = data["error"] + error_msg = f"RPC error {error.get('code', 'unknown')}: {error.get('message', 'Unknown error')}" + future.set_exception(RpcError(error_msg)) + else: + # Invalid response format + future.set_exception(RpcError("Invalid RPC response format")) + else: + logger.warning(f"Received response for unknown or completed call: {call_id}") + + def is_connected(self) -> bool: + """Check if the RPC client is still active.""" + return not self._closed + + def __repr__(self) -> str: + """String representation of the RPC client.""" + status = "connected" if self.is_connected() else "closed" + return f"RpcClient(status={status}, pending_calls={len(self._pending_calls)})" \ No newline at end of file diff --git a/eval-server/python/test_client.py b/eval-server/python/test_client.py new file mode 100644 index 0000000..37f2520 --- /dev/null +++ b/eval-server/python/test_client.py @@ -0,0 +1,190 @@ +#!/usr/bin/env python3 +""" +Test client for debugging connection issues with bo-eval-server. + +This client helps test the WebSocket connection and protocol implementation. +""" + +import asyncio +import json +import sys +import uuid +from pathlib import Path + +# Add src to path for development +sys.path.insert(0, str(Path(__file__).parent / "src")) + +try: + import websockets +except ImportError: + print("โŒ websockets not installed. Run: uv add websockets") + sys.exit(1) + + +class TestClient: + """Simple test client for debugging server connections.""" + + def __init__(self, server_url: str = "ws://127.0.0.1:8080", auth_key: str = "hello"): + self.server_url = server_url + self.auth_key = auth_key + self.client_id = str(uuid.uuid4()) + self.websocket = None + + async def connect_and_test(self): + """Connect to server and test the NodeJS-compatible protocol.""" + print(f"๐Ÿ”— Connecting to {self.server_url}") + print(f" Client ID: {self.client_id}") + print(f" Auth Key: {self.auth_key}") + + try: + # Connect to WebSocket + self.websocket = await websockets.connect( + self.server_url, + ping_interval=20, + ping_timeout=20, + close_timeout=10, + ) + print("โœ… WebSocket connection established") + + # Send registration message (NodeJS style) + registration = { + "type": "register", + "clientId": self.client_id, + "secretKey": self.auth_key, + "capabilities": ["chat", "action", "research"] + } + + print("๐Ÿ“ค Sending registration message:") + print(f" {json.dumps(registration, indent=2)}") + + await self.websocket.send(json.dumps(registration)) + + # Wait for registration acknowledgment + print("โณ Waiting for registration acknowledgment...") + response = await asyncio.wait_for(self.websocket.recv(), timeout=10.0) + response_data = json.loads(response) + + print("๐Ÿ“ฅ Received registration acknowledgment:") + print(f" {json.dumps(response_data, indent=2)}") + + if response_data.get("type") == "registration_ack" and response_data.get("status") == "accepted": + print("โœ… Registration successful!") + + # Send ready signal + ready_message = {"type": "ready"} + print("๐Ÿ“ค Sending ready signal:") + print(f" {json.dumps(ready_message, indent=2)}") + + await self.websocket.send(json.dumps(ready_message)) + print("โœ… Ready signal sent") + + # Listen for RPC calls + print("๐Ÿ‘‚ Listening for RPC calls...") + await self.listen_for_calls() + + elif response_data.get("type") == "error": + print(f"โŒ Registration failed: {response_data.get('message')}") + return False + else: + print(f"โ“ Unexpected response: {response_data}") + return False + + except asyncio.TimeoutError: + print("โฐ Timeout waiting for server response") + return False + except websockets.exceptions.ConnectionClosed as e: + print(f"๐Ÿ”Œ Connection closed: {e}") + return False + except Exception as e: + print(f"๐Ÿ’ฅ Error during connection: {e}") + return False + finally: + if self.websocket: + await self.websocket.close() + + return True + + async def listen_for_calls(self): + """Listen for RPC calls from the server.""" + try: + async for message in self.websocket: + print(f"\n๐Ÿ“ฅ Received message: {message}") + + try: + data = json.loads(message) + + if data.get("jsonrpc") == "2.0" and data.get("method") == "evaluate": + print("๐ŸŽฏ Received RPC evaluation request") + print(f" ID: {data.get('id')}") + print(f" Params: {json.dumps(data.get('params', {}), indent=2)}") + + # Send mock response + response = { + "jsonrpc": "2.0", + "id": data["id"], + "result": { + "status": "completed", + "output": { + "response": f"Mock response for evaluation {data['params'].get('name', 'unknown')}" + }, + "metadata": { + "client_id": self.client_id, + "test_client": True + } + } + } + + print("๐Ÿ“ค Sending mock response:") + print(f" {json.dumps(response, indent=2)}") + + await self.websocket.send(json.dumps(response)) + print("โœ… Mock response sent") + else: + print(f"โ“ Unknown message type: {data}") + + except json.JSONDecodeError as e: + print(f"โŒ Invalid JSON received: {e}") + + except websockets.exceptions.ConnectionClosed: + print("๐Ÿ”Œ Connection closed by server") + except Exception as e: + print(f"๐Ÿ’ฅ Error listening for calls: {e}") + + +async def main(): + """Main test function.""" + print("๐Ÿงช Test Client for bo-eval-server") + print("=" * 40) + + if len(sys.argv) > 1: + server_url = sys.argv[1] + else: + server_url = "ws://127.0.0.1:8080" + + if len(sys.argv) > 2: + auth_key = sys.argv[2] + else: + auth_key = "hello" # Default from examples + + client = TestClient(server_url, auth_key) + + try: + success = await client.connect_and_test() + if success: + print("\nโœ… Test completed successfully!") + else: + print("\nโŒ Test failed!") + sys.exit(1) + except KeyboardInterrupt: + print("\n๐Ÿ›‘ Test interrupted by user") + except Exception as e: + print(f"\n๐Ÿ’ฅ Test failed with error: {e}") + sys.exit(1) + + +if __name__ == "__main__": + print("Usage: python test_client.py [ws://server:port] [auth_key]") + print("Example: python test_client.py ws://127.0.0.1:8080 hello") + print() + + asyncio.run(main()) \ No newline at end of file diff --git a/eval-server/python/uv.lock b/eval-server/python/uv.lock new file mode 100644 index 0000000..2da9568 --- /dev/null +++ b/eval-server/python/uv.lock @@ -0,0 +1,1306 @@ +version = 1 +revision = 2 +requires-python = ">=3.8" +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", + "python_full_version < '3.9'", +] + +[[package]] +name = "backports-asyncio-runner" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8e/ff/70dca7d7cb1cbc0edb2c6cc0c38b65cba36cccc491eca64cabd5fe7f8670/backports_asyncio_runner-1.2.0.tar.gz", hash = "sha256:a5aa7b2b7d8f8bfcaa2b57313f70792df84e32a2a746f585213373f900b42162", size = 69893, upload-time = "2025-07-02T02:27:15.685Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a0/59/76ab57e3fe74484f48a53f8e337171b4a2349e506eabe136d7e01d059086/backports_asyncio_runner-1.2.0-py3-none-any.whl", hash = "sha256:0da0a936a8aeb554eccb426dc55af3ba63bcdc69fa1a600b5bb305413a4477b5", size = 12313, upload-time = "2025-07-02T02:27:14.263Z" }, +] + +[[package]] +name = "black" +version = "24.8.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +dependencies = [ + { name = "click", version = "8.1.8", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "mypy-extensions", marker = "python_full_version < '3.9'" }, + { name = "packaging", marker = "python_full_version < '3.9'" }, + { name = "pathspec", marker = "python_full_version < '3.9'" }, + { name = "platformdirs", version = "4.3.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "tomli", marker = "python_full_version < '3.9'" }, + { name = "typing-extensions", version = "4.13.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/04/b0/46fb0d4e00372f4a86a6f8efa3cb193c9f64863615e39010b1477e010578/black-24.8.0.tar.gz", hash = "sha256:2500945420b6784c38b9ee885af039f5e7471ef284ab03fa35ecdde4688cd83f", size = 644810, upload-time = "2024-08-02T17:43:18.405Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/47/6e/74e29edf1fba3887ed7066930a87f698ffdcd52c5dbc263eabb06061672d/black-24.8.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:09cdeb74d494ec023ded657f7092ba518e8cf78fa8386155e4a03fdcc44679e6", size = 1632092, upload-time = "2024-08-02T17:47:26.911Z" }, + { url = "https://files.pythonhosted.org/packages/ab/49/575cb6c3faee690b05c9d11ee2e8dba8fbd6d6c134496e644c1feb1b47da/black-24.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:81c6742da39f33b08e791da38410f32e27d632260e599df7245cccee2064afeb", size = 1457529, upload-time = "2024-08-02T17:47:29.109Z" }, + { url = "https://files.pythonhosted.org/packages/7a/b4/d34099e95c437b53d01c4aa37cf93944b233066eb034ccf7897fa4e5f286/black-24.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:707a1ca89221bc8a1a64fb5e15ef39cd755633daa672a9db7498d1c19de66a42", size = 1757443, upload-time = "2024-08-02T17:46:20.306Z" }, + { url = "https://files.pythonhosted.org/packages/87/a0/6d2e4175ef364b8c4b64f8441ba041ed65c63ea1db2720d61494ac711c15/black-24.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:d6417535d99c37cee4091a2f24eb2b6d5ec42b144d50f1f2e436d9fe1916fe1a", size = 1418012, upload-time = "2024-08-02T17:47:20.33Z" }, + { url = "https://files.pythonhosted.org/packages/08/a6/0a3aa89de9c283556146dc6dbda20cd63a9c94160a6fbdebaf0918e4a3e1/black-24.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:fb6e2c0b86bbd43dee042e48059c9ad7830abd5c94b0bc518c0eeec57c3eddc1", size = 1615080, upload-time = "2024-08-02T17:48:05.467Z" }, + { url = "https://files.pythonhosted.org/packages/db/94/b803d810e14588bb297e565821a947c108390a079e21dbdcb9ab6956cd7a/black-24.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:837fd281f1908d0076844bc2b801ad2d369c78c45cf800cad7b61686051041af", size = 1438143, upload-time = "2024-08-02T17:47:30.247Z" }, + { url = "https://files.pythonhosted.org/packages/a5/b5/f485e1bbe31f768e2e5210f52ea3f432256201289fd1a3c0afda693776b0/black-24.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:62e8730977f0b77998029da7971fa896ceefa2c4c4933fcd593fa599ecbf97a4", size = 1738774, upload-time = "2024-08-02T17:46:17.837Z" }, + { url = "https://files.pythonhosted.org/packages/a8/69/a000fc3736f89d1bdc7f4a879f8aaf516fb03613bb51a0154070383d95d9/black-24.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:72901b4913cbac8972ad911dc4098d5753704d1f3c56e44ae8dce99eecb0e3af", size = 1427503, upload-time = "2024-08-02T17:46:22.654Z" }, + { url = "https://files.pythonhosted.org/packages/a2/a8/05fb14195cfef32b7c8d4585a44b7499c2a4b205e1662c427b941ed87054/black-24.8.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:7c046c1d1eeb7aea9335da62472481d3bbf3fd986e093cffd35f4385c94ae368", size = 1646132, upload-time = "2024-08-02T17:49:52.843Z" }, + { url = "https://files.pythonhosted.org/packages/41/77/8d9ce42673e5cb9988f6df73c1c5c1d4e9e788053cccd7f5fb14ef100982/black-24.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:649f6d84ccbae73ab767e206772cc2d7a393a001070a4c814a546afd0d423aed", size = 1448665, upload-time = "2024-08-02T17:47:54.479Z" }, + { url = "https://files.pythonhosted.org/packages/cc/94/eff1ddad2ce1d3cc26c162b3693043c6b6b575f538f602f26fe846dfdc75/black-24.8.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2b59b250fdba5f9a9cd9d0ece6e6d993d91ce877d121d161e4698af3eb9c1018", size = 1762458, upload-time = "2024-08-02T17:46:19.384Z" }, + { url = "https://files.pythonhosted.org/packages/28/ea/18b8d86a9ca19a6942e4e16759b2fa5fc02bbc0eb33c1b866fcd387640ab/black-24.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:6e55d30d44bed36593c3163b9bc63bf58b3b30e4611e4d88a0c3c239930ed5b2", size = 1436109, upload-time = "2024-08-02T17:46:52.97Z" }, + { url = "https://files.pythonhosted.org/packages/9f/d4/ae03761ddecc1a37d7e743b89cccbcf3317479ff4b88cfd8818079f890d0/black-24.8.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:505289f17ceda596658ae81b61ebbe2d9b25aa78067035184ed0a9d855d18afd", size = 1617322, upload-time = "2024-08-02T17:51:20.203Z" }, + { url = "https://files.pythonhosted.org/packages/14/4b/4dfe67eed7f9b1ddca2ec8e4418ea74f0d1dc84d36ea874d618ffa1af7d4/black-24.8.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b19c9ad992c7883ad84c9b22aaa73562a16b819c1d8db7a1a1a49fb7ec13c7d2", size = 1442108, upload-time = "2024-08-02T17:50:40.824Z" }, + { url = "https://files.pythonhosted.org/packages/97/14/95b3f91f857034686cae0e73006b8391d76a8142d339b42970eaaf0416ea/black-24.8.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1f13f7f386f86f8121d76599114bb8c17b69d962137fc70efe56137727c7047e", size = 1745786, upload-time = "2024-08-02T17:46:02.939Z" }, + { url = "https://files.pythonhosted.org/packages/95/54/68b8883c8aa258a6dde958cd5bdfada8382bec47c5162f4a01e66d839af1/black-24.8.0-cp38-cp38-win_amd64.whl", hash = "sha256:f490dbd59680d809ca31efdae20e634f3fae27fba3ce0ba3208333b713bc3920", size = 1426754, upload-time = "2024-08-02T17:46:38.603Z" }, + { url = "https://files.pythonhosted.org/packages/13/b2/b3f24fdbb46f0e7ef6238e131f13572ee8279b70f237f221dd168a9dba1a/black-24.8.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:eab4dd44ce80dea27dc69db40dab62d4ca96112f87996bca68cd75639aeb2e4c", size = 1631706, upload-time = "2024-08-02T17:49:57.606Z" }, + { url = "https://files.pythonhosted.org/packages/d9/35/31010981e4a05202a84a3116423970fd1a59d2eda4ac0b3570fbb7029ddc/black-24.8.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3c4285573d4897a7610054af5a890bde7c65cb466040c5f0c8b732812d7f0e5e", size = 1457429, upload-time = "2024-08-02T17:49:12.764Z" }, + { url = "https://files.pythonhosted.org/packages/27/25/3f706b4f044dd569a20a4835c3b733dedea38d83d2ee0beb8178a6d44945/black-24.8.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9e84e33b37be070ba135176c123ae52a51f82306def9f7d063ee302ecab2cf47", size = 1756488, upload-time = "2024-08-02T17:46:08.067Z" }, + { url = "https://files.pythonhosted.org/packages/63/72/79375cd8277cbf1c5670914e6bd4c1b15dea2c8f8e906dc21c448d0535f0/black-24.8.0-cp39-cp39-win_amd64.whl", hash = "sha256:73bbf84ed136e45d451a260c6b73ed674652f90a2b3211d6a35e78054563a9bb", size = 1417721, upload-time = "2024-08-02T17:46:42.637Z" }, + { url = "https://files.pythonhosted.org/packages/27/1e/83fa8a787180e1632c3d831f7e58994d7aaf23a0961320d21e84f922f919/black-24.8.0-py3-none-any.whl", hash = "sha256:972085c618ee94f402da1af548a4f218c754ea7e5dc70acb168bfaca4c2542ed", size = 206504, upload-time = "2024-08-02T17:43:15.747Z" }, +] + +[[package]] +name = "black" +version = "25.1.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", +] +dependencies = [ + { name = "click", version = "8.1.8", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "click", version = "8.2.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "mypy-extensions", marker = "python_full_version >= '3.9'" }, + { name = "packaging", marker = "python_full_version >= '3.9'" }, + { name = "pathspec", marker = "python_full_version >= '3.9'" }, + { name = "platformdirs", version = "4.3.8", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, + { name = "tomli", marker = "python_full_version >= '3.9' and python_full_version < '3.11'" }, + { name = "typing-extensions", version = "4.14.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9' and python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/94/49/26a7b0f3f35da4b5a65f081943b7bcd22d7002f5f0fb8098ec1ff21cb6ef/black-25.1.0.tar.gz", hash = "sha256:33496d5cd1222ad73391352b4ae8da15253c5de89b93a80b3e2c8d9a19ec2666", size = 649449, upload-time = "2025-01-29T04:15:40.373Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4d/3b/4ba3f93ac8d90410423fdd31d7541ada9bcee1df32fb90d26de41ed40e1d/black-25.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:759e7ec1e050a15f89b770cefbf91ebee8917aac5c20483bc2d80a6c3a04df32", size = 1629419, upload-time = "2025-01-29T05:37:06.642Z" }, + { url = "https://files.pythonhosted.org/packages/b4/02/0bde0485146a8a5e694daed47561785e8b77a0466ccc1f3e485d5ef2925e/black-25.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0e519ecf93120f34243e6b0054db49c00a35f84f195d5bce7e9f5cfc578fc2da", size = 1461080, upload-time = "2025-01-29T05:37:09.321Z" }, + { url = "https://files.pythonhosted.org/packages/52/0e/abdf75183c830eaca7589144ff96d49bce73d7ec6ad12ef62185cc0f79a2/black-25.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:055e59b198df7ac0b7efca5ad7ff2516bca343276c466be72eb04a3bcc1f82d7", size = 1766886, upload-time = "2025-01-29T04:18:24.432Z" }, + { url = "https://files.pythonhosted.org/packages/dc/a6/97d8bb65b1d8a41f8a6736222ba0a334db7b7b77b8023ab4568288f23973/black-25.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:db8ea9917d6f8fc62abd90d944920d95e73c83a5ee3383493e35d271aca872e9", size = 1419404, upload-time = "2025-01-29T04:19:04.296Z" }, + { url = "https://files.pythonhosted.org/packages/7e/4f/87f596aca05c3ce5b94b8663dbfe242a12843caaa82dd3f85f1ffdc3f177/black-25.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a39337598244de4bae26475f77dda852ea00a93bd4c728e09eacd827ec929df0", size = 1614372, upload-time = "2025-01-29T05:37:11.71Z" }, + { url = "https://files.pythonhosted.org/packages/e7/d0/2c34c36190b741c59c901e56ab7f6e54dad8df05a6272a9747ecef7c6036/black-25.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:96c1c7cd856bba8e20094e36e0f948718dc688dba4a9d78c3adde52b9e6c2299", size = 1442865, upload-time = "2025-01-29T05:37:14.309Z" }, + { url = "https://files.pythonhosted.org/packages/21/d4/7518c72262468430ead45cf22bd86c883a6448b9eb43672765d69a8f1248/black-25.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bce2e264d59c91e52d8000d507eb20a9aca4a778731a08cfff7e5ac4a4bb7096", size = 1749699, upload-time = "2025-01-29T04:18:17.688Z" }, + { url = "https://files.pythonhosted.org/packages/58/db/4f5beb989b547f79096e035c4981ceb36ac2b552d0ac5f2620e941501c99/black-25.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:172b1dbff09f86ce6f4eb8edf9dede08b1fce58ba194c87d7a4f1a5aa2f5b3c2", size = 1428028, upload-time = "2025-01-29T04:18:51.711Z" }, + { url = "https://files.pythonhosted.org/packages/83/71/3fe4741df7adf015ad8dfa082dd36c94ca86bb21f25608eb247b4afb15b2/black-25.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4b60580e829091e6f9238c848ea6750efed72140b91b048770b64e74fe04908b", size = 1650988, upload-time = "2025-01-29T05:37:16.707Z" }, + { url = "https://files.pythonhosted.org/packages/13/f3/89aac8a83d73937ccd39bbe8fc6ac8860c11cfa0af5b1c96d081facac844/black-25.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1e2978f6df243b155ef5fa7e558a43037c3079093ed5d10fd84c43900f2d8ecc", size = 1453985, upload-time = "2025-01-29T05:37:18.273Z" }, + { url = "https://files.pythonhosted.org/packages/6f/22/b99efca33f1f3a1d2552c714b1e1b5ae92efac6c43e790ad539a163d1754/black-25.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b48735872ec535027d979e8dcb20bf4f70b5ac75a8ea99f127c106a7d7aba9f", size = 1783816, upload-time = "2025-01-29T04:18:33.823Z" }, + { url = "https://files.pythonhosted.org/packages/18/7e/a27c3ad3822b6f2e0e00d63d58ff6299a99a5b3aee69fa77cd4b0076b261/black-25.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:ea0213189960bda9cf99be5b8c8ce66bb054af5e9e861249cd23471bd7b0b3ba", size = 1440860, upload-time = "2025-01-29T04:19:12.944Z" }, + { url = "https://files.pythonhosted.org/packages/98/87/0edf98916640efa5d0696e1abb0a8357b52e69e82322628f25bf14d263d1/black-25.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8f0b18a02996a836cc9c9c78e5babec10930862827b1b724ddfe98ccf2f2fe4f", size = 1650673, upload-time = "2025-01-29T05:37:20.574Z" }, + { url = "https://files.pythonhosted.org/packages/52/e5/f7bf17207cf87fa6e9b676576749c6b6ed0d70f179a3d812c997870291c3/black-25.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:afebb7098bfbc70037a053b91ae8437c3857482d3a690fefc03e9ff7aa9a5fd3", size = 1453190, upload-time = "2025-01-29T05:37:22.106Z" }, + { url = "https://files.pythonhosted.org/packages/e3/ee/adda3d46d4a9120772fae6de454c8495603c37c4c3b9c60f25b1ab6401fe/black-25.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:030b9759066a4ee5e5aca28c3c77f9c64789cdd4de8ac1df642c40b708be6171", size = 1782926, upload-time = "2025-01-29T04:18:58.564Z" }, + { url = "https://files.pythonhosted.org/packages/cc/64/94eb5f45dcb997d2082f097a3944cfc7fe87e071907f677e80788a2d7b7a/black-25.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:a22f402b410566e2d1c950708c77ebf5ebd5d0d88a6a2e87c86d9fb48afa0d18", size = 1442613, upload-time = "2025-01-29T04:19:27.63Z" }, + { url = "https://files.pythonhosted.org/packages/d3/b6/ae7507470a4830dbbfe875c701e84a4a5fb9183d1497834871a715716a92/black-25.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a1ee0a0c330f7b5130ce0caed9936a904793576ef4d2b98c40835d6a65afa6a0", size = 1628593, upload-time = "2025-01-29T05:37:23.672Z" }, + { url = "https://files.pythonhosted.org/packages/24/c1/ae36fa59a59f9363017ed397750a0cd79a470490860bc7713967d89cdd31/black-25.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f3df5f1bf91d36002b0a75389ca8663510cf0531cca8aa5c1ef695b46d98655f", size = 1460000, upload-time = "2025-01-29T05:37:25.829Z" }, + { url = "https://files.pythonhosted.org/packages/ac/b6/98f832e7a6c49aa3a464760c67c7856363aa644f2f3c74cf7d624168607e/black-25.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d9e6827d563a2c820772b32ce8a42828dc6790f095f441beef18f96aa6f8294e", size = 1765963, upload-time = "2025-01-29T04:18:38.116Z" }, + { url = "https://files.pythonhosted.org/packages/ce/e9/2cb0a017eb7024f70e0d2e9bdb8c5a5b078c5740c7f8816065d06f04c557/black-25.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:bacabb307dca5ebaf9c118d2d2f6903da0d62c9faa82bd21a33eecc319559355", size = 1419419, upload-time = "2025-01-29T04:18:30.191Z" }, + { url = "https://files.pythonhosted.org/packages/09/71/54e999902aed72baf26bca0d50781b01838251a462612966e9fc4891eadd/black-25.1.0-py3-none-any.whl", hash = "sha256:95e8176dae143ba9097f351d174fdaf0ccd29efb414b362ae3fd72bf0f710717", size = 207646, upload-time = "2025-01-29T04:15:38.082Z" }, +] + +[[package]] +name = "bo-eval-server" +version = "1.0.0" +source = { editable = "." } +dependencies = [ + { name = "loguru" }, + { name = "pandas", version = "2.0.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "pandas", version = "2.3.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, + { name = "requests" }, + { name = "websockets", version = "13.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "websockets", version = "15.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, +] + +[package.optional-dependencies] +dev = [ + { name = "black", version = "24.8.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "black", version = "25.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, + { name = "mypy", version = "1.14.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "mypy", version = "1.17.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, + { name = "pytest", version = "8.3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "pytest", version = "8.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, + { name = "pytest-asyncio", version = "0.24.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "pytest-asyncio", version = "1.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, +] + +[package.dev-dependencies] +dev = [ + { name = "black", version = "24.8.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "black", version = "25.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, + { name = "mypy", version = "1.14.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "mypy", version = "1.17.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, + { name = "pytest", version = "8.3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "pytest", version = "8.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, + { name = "pytest-asyncio", version = "0.24.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "pytest-asyncio", version = "1.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, +] + +[package.metadata] +requires-dist = [ + { name = "black", marker = "extra == 'dev'", specifier = ">=23.0.0" }, + { name = "loguru", specifier = ">=0.7.0" }, + { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.0.0" }, + { name = "pandas", specifier = ">=2.0.0" }, + { name = "pytest", marker = "extra == 'dev'", specifier = ">=7.0.0" }, + { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.21.0" }, + { name = "requests", specifier = ">=2.31.0" }, + { name = "websockets", specifier = ">=11.0.0" }, +] +provides-extras = ["dev"] + +[package.metadata.requires-dev] +dev = [ + { name = "black", specifier = ">=24.8.0" }, + { name = "mypy", specifier = ">=1.14.1" }, + { name = "pytest", specifier = ">=8.3.5" }, + { name = "pytest-asyncio", specifier = ">=0.24.0" }, +] + +[[package]] +name = "certifi" +version = "2025.8.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/dc/67/960ebe6bf230a96cda2e0abcf73af550ec4f090005363542f0765df162e0/certifi-2025.8.3.tar.gz", hash = "sha256:e564105f78ded564e3ae7c923924435e1daa7463faeab5bb932bc53ffae63407", size = 162386, upload-time = "2025-08-03T03:07:47.08Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e5/48/1549795ba7742c948d2ad169c1c8cdbae65bc450d6cd753d124b17c8cd32/certifi-2025.8.3-py3-none-any.whl", hash = "sha256:f6c12493cfb1b06ba2ff328595af9350c65d6644968e5d3a2ffd78699af217a5", size = 161216, upload-time = "2025-08-03T03:07:45.777Z" }, +] + +[[package]] +name = "charset-normalizer" +version = "3.4.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e4/33/89c2ced2b67d1c2a61c19c6751aa8902d46ce3dacb23600a283619f5a12d/charset_normalizer-3.4.2.tar.gz", hash = "sha256:5baececa9ecba31eff645232d59845c07aa030f0c81ee70184a90d35099a0e63", size = 126367, upload-time = "2025-05-02T08:34:42.01Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/95/28/9901804da60055b406e1a1c5ba7aac1276fb77f1dde635aabfc7fd84b8ab/charset_normalizer-3.4.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7c48ed483eb946e6c04ccbe02c6b4d1d48e51944b6db70f697e089c193404941", size = 201818, upload-time = "2025-05-02T08:31:46.725Z" }, + { url = "https://files.pythonhosted.org/packages/d9/9b/892a8c8af9110935e5adcbb06d9c6fe741b6bb02608c6513983048ba1a18/charset_normalizer-3.4.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2d318c11350e10662026ad0eb71bb51c7812fc8590825304ae0bdd4ac283acd", size = 144649, upload-time = "2025-05-02T08:31:48.889Z" }, + { url = "https://files.pythonhosted.org/packages/7b/a5/4179abd063ff6414223575e008593861d62abfc22455b5d1a44995b7c101/charset_normalizer-3.4.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9cbfacf36cb0ec2897ce0ebc5d08ca44213af24265bd56eca54bee7923c48fd6", size = 155045, upload-time = "2025-05-02T08:31:50.757Z" }, + { url = "https://files.pythonhosted.org/packages/3b/95/bc08c7dfeddd26b4be8c8287b9bb055716f31077c8b0ea1cd09553794665/charset_normalizer-3.4.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:18dd2e350387c87dabe711b86f83c9c78af772c748904d372ade190b5c7c9d4d", size = 147356, upload-time = "2025-05-02T08:31:52.634Z" }, + { url = "https://files.pythonhosted.org/packages/a8/2d/7a5b635aa65284bf3eab7653e8b4151ab420ecbae918d3e359d1947b4d61/charset_normalizer-3.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8075c35cd58273fee266c58c0c9b670947c19df5fb98e7b66710e04ad4e9ff86", size = 149471, upload-time = "2025-05-02T08:31:56.207Z" }, + { url = "https://files.pythonhosted.org/packages/ae/38/51fc6ac74251fd331a8cfdb7ec57beba8c23fd5493f1050f71c87ef77ed0/charset_normalizer-3.4.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5bf4545e3b962767e5c06fe1738f951f77d27967cb2caa64c28be7c4563e162c", size = 151317, upload-time = "2025-05-02T08:31:57.613Z" }, + { url = "https://files.pythonhosted.org/packages/b7/17/edee1e32215ee6e9e46c3e482645b46575a44a2d72c7dfd49e49f60ce6bf/charset_normalizer-3.4.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:7a6ab32f7210554a96cd9e33abe3ddd86732beeafc7a28e9955cdf22ffadbab0", size = 146368, upload-time = "2025-05-02T08:31:59.468Z" }, + { url = "https://files.pythonhosted.org/packages/26/2c/ea3e66f2b5f21fd00b2825c94cafb8c326ea6240cd80a91eb09e4a285830/charset_normalizer-3.4.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:b33de11b92e9f75a2b545d6e9b6f37e398d86c3e9e9653c4864eb7e89c5773ef", size = 154491, upload-time = "2025-05-02T08:32:01.219Z" }, + { url = "https://files.pythonhosted.org/packages/52/47/7be7fa972422ad062e909fd62460d45c3ef4c141805b7078dbab15904ff7/charset_normalizer-3.4.2-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:8755483f3c00d6c9a77f490c17e6ab0c8729e39e6390328e42521ef175380ae6", size = 157695, upload-time = "2025-05-02T08:32:03.045Z" }, + { url = "https://files.pythonhosted.org/packages/2f/42/9f02c194da282b2b340f28e5fb60762de1151387a36842a92b533685c61e/charset_normalizer-3.4.2-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:68a328e5f55ec37c57f19ebb1fdc56a248db2e3e9ad769919a58672958e8f366", size = 154849, upload-time = "2025-05-02T08:32:04.651Z" }, + { url = "https://files.pythonhosted.org/packages/67/44/89cacd6628f31fb0b63201a618049be4be2a7435a31b55b5eb1c3674547a/charset_normalizer-3.4.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:21b2899062867b0e1fde9b724f8aecb1af14f2778d69aacd1a5a1853a597a5db", size = 150091, upload-time = "2025-05-02T08:32:06.719Z" }, + { url = "https://files.pythonhosted.org/packages/1f/79/4b8da9f712bc079c0f16b6d67b099b0b8d808c2292c937f267d816ec5ecc/charset_normalizer-3.4.2-cp310-cp310-win32.whl", hash = "sha256:e8082b26888e2f8b36a042a58307d5b917ef2b1cacab921ad3323ef91901c71a", size = 98445, upload-time = "2025-05-02T08:32:08.66Z" }, + { url = "https://files.pythonhosted.org/packages/7d/d7/96970afb4fb66497a40761cdf7bd4f6fca0fc7bafde3a84f836c1f57a926/charset_normalizer-3.4.2-cp310-cp310-win_amd64.whl", hash = "sha256:f69a27e45c43520f5487f27627059b64aaf160415589230992cec34c5e18a509", size = 105782, upload-time = "2025-05-02T08:32:10.46Z" }, + { url = "https://files.pythonhosted.org/packages/05/85/4c40d00dcc6284a1c1ad5de5e0996b06f39d8232f1031cd23c2f5c07ee86/charset_normalizer-3.4.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:be1e352acbe3c78727a16a455126d9ff83ea2dfdcbc83148d2982305a04714c2", size = 198794, upload-time = "2025-05-02T08:32:11.945Z" }, + { url = "https://files.pythonhosted.org/packages/41/d9/7a6c0b9db952598e97e93cbdfcb91bacd89b9b88c7c983250a77c008703c/charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa88ca0b1932e93f2d961bf3addbb2db902198dca337d88c89e1559e066e7645", size = 142846, upload-time = "2025-05-02T08:32:13.946Z" }, + { url = "https://files.pythonhosted.org/packages/66/82/a37989cda2ace7e37f36c1a8ed16c58cf48965a79c2142713244bf945c89/charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d524ba3f1581b35c03cb42beebab4a13e6cdad7b36246bd22541fa585a56cccd", size = 153350, upload-time = "2025-05-02T08:32:15.873Z" }, + { url = "https://files.pythonhosted.org/packages/df/68/a576b31b694d07b53807269d05ec3f6f1093e9545e8607121995ba7a8313/charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28a1005facc94196e1fb3e82a3d442a9d9110b8434fc1ded7a24a2983c9888d8", size = 145657, upload-time = "2025-05-02T08:32:17.283Z" }, + { url = "https://files.pythonhosted.org/packages/92/9b/ad67f03d74554bed3aefd56fe836e1623a50780f7c998d00ca128924a499/charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fdb20a30fe1175ecabed17cbf7812f7b804b8a315a25f24678bcdf120a90077f", size = 147260, upload-time = "2025-05-02T08:32:18.807Z" }, + { url = "https://files.pythonhosted.org/packages/a6/e6/8aebae25e328160b20e31a7e9929b1578bbdc7f42e66f46595a432f8539e/charset_normalizer-3.4.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0f5d9ed7f254402c9e7d35d2f5972c9bbea9040e99cd2861bd77dc68263277c7", size = 149164, upload-time = "2025-05-02T08:32:20.333Z" }, + { url = "https://files.pythonhosted.org/packages/8b/f2/b3c2f07dbcc248805f10e67a0262c93308cfa149a4cd3d1fe01f593e5fd2/charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:efd387a49825780ff861998cd959767800d54f8308936b21025326de4b5a42b9", size = 144571, upload-time = "2025-05-02T08:32:21.86Z" }, + { url = "https://files.pythonhosted.org/packages/60/5b/c3f3a94bc345bc211622ea59b4bed9ae63c00920e2e8f11824aa5708e8b7/charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:f0aa37f3c979cf2546b73e8222bbfa3dc07a641585340179d768068e3455e544", size = 151952, upload-time = "2025-05-02T08:32:23.434Z" }, + { url = "https://files.pythonhosted.org/packages/e2/4d/ff460c8b474122334c2fa394a3f99a04cf11c646da895f81402ae54f5c42/charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:e70e990b2137b29dc5564715de1e12701815dacc1d056308e2b17e9095372a82", size = 155959, upload-time = "2025-05-02T08:32:24.993Z" }, + { url = "https://files.pythonhosted.org/packages/a2/2b/b964c6a2fda88611a1fe3d4c400d39c66a42d6c169c924818c848f922415/charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:0c8c57f84ccfc871a48a47321cfa49ae1df56cd1d965a09abe84066f6853b9c0", size = 153030, upload-time = "2025-05-02T08:32:26.435Z" }, + { url = "https://files.pythonhosted.org/packages/59/2e/d3b9811db26a5ebf444bc0fa4f4be5aa6d76fc6e1c0fd537b16c14e849b6/charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6b66f92b17849b85cad91259efc341dce9c1af48e2173bf38a85c6329f1033e5", size = 148015, upload-time = "2025-05-02T08:32:28.376Z" }, + { url = "https://files.pythonhosted.org/packages/90/07/c5fd7c11eafd561bb51220d600a788f1c8d77c5eef37ee49454cc5c35575/charset_normalizer-3.4.2-cp311-cp311-win32.whl", hash = "sha256:daac4765328a919a805fa5e2720f3e94767abd632ae410a9062dff5412bae65a", size = 98106, upload-time = "2025-05-02T08:32:30.281Z" }, + { url = "https://files.pythonhosted.org/packages/a8/05/5e33dbef7e2f773d672b6d79f10ec633d4a71cd96db6673625838a4fd532/charset_normalizer-3.4.2-cp311-cp311-win_amd64.whl", hash = "sha256:e53efc7c7cee4c1e70661e2e112ca46a575f90ed9ae3fef200f2a25e954f4b28", size = 105402, upload-time = "2025-05-02T08:32:32.191Z" }, + { url = "https://files.pythonhosted.org/packages/d7/a4/37f4d6035c89cac7930395a35cc0f1b872e652eaafb76a6075943754f095/charset_normalizer-3.4.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0c29de6a1a95f24b9a1aa7aefd27d2487263f00dfd55a77719b530788f75cff7", size = 199936, upload-time = "2025-05-02T08:32:33.712Z" }, + { url = "https://files.pythonhosted.org/packages/ee/8a/1a5e33b73e0d9287274f899d967907cd0bf9c343e651755d9307e0dbf2b3/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cddf7bd982eaa998934a91f69d182aec997c6c468898efe6679af88283b498d3", size = 143790, upload-time = "2025-05-02T08:32:35.768Z" }, + { url = "https://files.pythonhosted.org/packages/66/52/59521f1d8e6ab1482164fa21409c5ef44da3e9f653c13ba71becdd98dec3/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fcbe676a55d7445b22c10967bceaaf0ee69407fbe0ece4d032b6eb8d4565982a", size = 153924, upload-time = "2025-05-02T08:32:37.284Z" }, + { url = "https://files.pythonhosted.org/packages/86/2d/fb55fdf41964ec782febbf33cb64be480a6b8f16ded2dbe8db27a405c09f/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d41c4d287cfc69060fa91cae9683eacffad989f1a10811995fa309df656ec214", size = 146626, upload-time = "2025-05-02T08:32:38.803Z" }, + { url = "https://files.pythonhosted.org/packages/8c/73/6ede2ec59bce19b3edf4209d70004253ec5f4e319f9a2e3f2f15601ed5f7/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e594135de17ab3866138f496755f302b72157d115086d100c3f19370839dd3a", size = 148567, upload-time = "2025-05-02T08:32:40.251Z" }, + { url = "https://files.pythonhosted.org/packages/09/14/957d03c6dc343c04904530b6bef4e5efae5ec7d7990a7cbb868e4595ee30/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cf713fe9a71ef6fd5adf7a79670135081cd4431c2943864757f0fa3a65b1fafd", size = 150957, upload-time = "2025-05-02T08:32:41.705Z" }, + { url = "https://files.pythonhosted.org/packages/0d/c8/8174d0e5c10ccebdcb1b53cc959591c4c722a3ad92461a273e86b9f5a302/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a370b3e078e418187da8c3674eddb9d983ec09445c99a3a263c2011993522981", size = 145408, upload-time = "2025-05-02T08:32:43.709Z" }, + { url = "https://files.pythonhosted.org/packages/58/aa/8904b84bc8084ac19dc52feb4f5952c6df03ffb460a887b42615ee1382e8/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a955b438e62efdf7e0b7b52a64dc5c3396e2634baa62471768a64bc2adb73d5c", size = 153399, upload-time = "2025-05-02T08:32:46.197Z" }, + { url = "https://files.pythonhosted.org/packages/c2/26/89ee1f0e264d201cb65cf054aca6038c03b1a0c6b4ae998070392a3ce605/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:7222ffd5e4de8e57e03ce2cef95a4c43c98fcb72ad86909abdfc2c17d227fc1b", size = 156815, upload-time = "2025-05-02T08:32:48.105Z" }, + { url = "https://files.pythonhosted.org/packages/fd/07/68e95b4b345bad3dbbd3a8681737b4338ff2c9df29856a6d6d23ac4c73cb/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:bee093bf902e1d8fc0ac143c88902c3dfc8941f7ea1d6a8dd2bcb786d33db03d", size = 154537, upload-time = "2025-05-02T08:32:49.719Z" }, + { url = "https://files.pythonhosted.org/packages/77/1a/5eefc0ce04affb98af07bc05f3bac9094513c0e23b0562d64af46a06aae4/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dedb8adb91d11846ee08bec4c8236c8549ac721c245678282dcb06b221aab59f", size = 149565, upload-time = "2025-05-02T08:32:51.404Z" }, + { url = "https://files.pythonhosted.org/packages/37/a0/2410e5e6032a174c95e0806b1a6585eb21e12f445ebe239fac441995226a/charset_normalizer-3.4.2-cp312-cp312-win32.whl", hash = "sha256:db4c7bf0e07fc3b7d89ac2a5880a6a8062056801b83ff56d8464b70f65482b6c", size = 98357, upload-time = "2025-05-02T08:32:53.079Z" }, + { url = "https://files.pythonhosted.org/packages/6c/4f/c02d5c493967af3eda9c771ad4d2bbc8df6f99ddbeb37ceea6e8716a32bc/charset_normalizer-3.4.2-cp312-cp312-win_amd64.whl", hash = "sha256:5a9979887252a82fefd3d3ed2a8e3b937a7a809f65dcb1e068b090e165bbe99e", size = 105776, upload-time = "2025-05-02T08:32:54.573Z" }, + { url = "https://files.pythonhosted.org/packages/ea/12/a93df3366ed32db1d907d7593a94f1fe6293903e3e92967bebd6950ed12c/charset_normalizer-3.4.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:926ca93accd5d36ccdabd803392ddc3e03e6d4cd1cf17deff3b989ab8e9dbcf0", size = 199622, upload-time = "2025-05-02T08:32:56.363Z" }, + { url = "https://files.pythonhosted.org/packages/04/93/bf204e6f344c39d9937d3c13c8cd5bbfc266472e51fc8c07cb7f64fcd2de/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eba9904b0f38a143592d9fc0e19e2df0fa2e41c3c3745554761c5f6447eedabf", size = 143435, upload-time = "2025-05-02T08:32:58.551Z" }, + { url = "https://files.pythonhosted.org/packages/22/2a/ea8a2095b0bafa6c5b5a55ffdc2f924455233ee7b91c69b7edfcc9e02284/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3fddb7e2c84ac87ac3a947cb4e66d143ca5863ef48e4a5ecb83bd48619e4634e", size = 153653, upload-time = "2025-05-02T08:33:00.342Z" }, + { url = "https://files.pythonhosted.org/packages/b6/57/1b090ff183d13cef485dfbe272e2fe57622a76694061353c59da52c9a659/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:98f862da73774290f251b9df8d11161b6cf25b599a66baf087c1ffe340e9bfd1", size = 146231, upload-time = "2025-05-02T08:33:02.081Z" }, + { url = "https://files.pythonhosted.org/packages/e2/28/ffc026b26f441fc67bd21ab7f03b313ab3fe46714a14b516f931abe1a2d8/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c9379d65defcab82d07b2a9dfbfc2e95bc8fe0ebb1b176a3190230a3ef0e07c", size = 148243, upload-time = "2025-05-02T08:33:04.063Z" }, + { url = "https://files.pythonhosted.org/packages/c0/0f/9abe9bd191629c33e69e47c6ef45ef99773320e9ad8e9cb08b8ab4a8d4cb/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e635b87f01ebc977342e2697d05b56632f5f879a4f15955dfe8cef2448b51691", size = 150442, upload-time = "2025-05-02T08:33:06.418Z" }, + { url = "https://files.pythonhosted.org/packages/67/7c/a123bbcedca91d5916c056407f89a7f5e8fdfce12ba825d7d6b9954a1a3c/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1c95a1e2902a8b722868587c0e1184ad5c55631de5afc0eb96bc4b0d738092c0", size = 145147, upload-time = "2025-05-02T08:33:08.183Z" }, + { url = "https://files.pythonhosted.org/packages/ec/fe/1ac556fa4899d967b83e9893788e86b6af4d83e4726511eaaad035e36595/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ef8de666d6179b009dce7bcb2ad4c4a779f113f12caf8dc77f0162c29d20490b", size = 153057, upload-time = "2025-05-02T08:33:09.986Z" }, + { url = "https://files.pythonhosted.org/packages/2b/ff/acfc0b0a70b19e3e54febdd5301a98b72fa07635e56f24f60502e954c461/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:32fc0341d72e0f73f80acb0a2c94216bd704f4f0bce10aedea38f30502b271ff", size = 156454, upload-time = "2025-05-02T08:33:11.814Z" }, + { url = "https://files.pythonhosted.org/packages/92/08/95b458ce9c740d0645feb0e96cea1f5ec946ea9c580a94adfe0b617f3573/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:289200a18fa698949d2b39c671c2cc7a24d44096784e76614899a7ccf2574b7b", size = 154174, upload-time = "2025-05-02T08:33:13.707Z" }, + { url = "https://files.pythonhosted.org/packages/78/be/8392efc43487ac051eee6c36d5fbd63032d78f7728cb37aebcc98191f1ff/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4a476b06fbcf359ad25d34a057b7219281286ae2477cc5ff5e3f70a246971148", size = 149166, upload-time = "2025-05-02T08:33:15.458Z" }, + { url = "https://files.pythonhosted.org/packages/44/96/392abd49b094d30b91d9fbda6a69519e95802250b777841cf3bda8fe136c/charset_normalizer-3.4.2-cp313-cp313-win32.whl", hash = "sha256:aaeeb6a479c7667fbe1099af9617c83aaca22182d6cf8c53966491a0f1b7ffb7", size = 98064, upload-time = "2025-05-02T08:33:17.06Z" }, + { url = "https://files.pythonhosted.org/packages/e9/b0/0200da600134e001d91851ddc797809e2fe0ea72de90e09bec5a2fbdaccb/charset_normalizer-3.4.2-cp313-cp313-win_amd64.whl", hash = "sha256:aa6af9e7d59f9c12b33ae4e9450619cf2488e2bbe9b44030905877f0b2324980", size = 105641, upload-time = "2025-05-02T08:33:18.753Z" }, + { url = "https://files.pythonhosted.org/packages/4c/fd/f700cfd4ad876def96d2c769d8a32d808b12d1010b6003dc6639157f99ee/charset_normalizer-3.4.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:76af085e67e56c8816c3ccf256ebd136def2ed9654525348cfa744b6802b69eb", size = 198257, upload-time = "2025-05-02T08:33:45.511Z" }, + { url = "https://files.pythonhosted.org/packages/3a/95/6eec4cbbbd119e6a402e3bfd16246785cc52ce64cf21af2ecdf7b3a08e91/charset_normalizer-3.4.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e45ba65510e2647721e35323d6ef54c7974959f6081b58d4ef5d87c60c84919a", size = 143453, upload-time = "2025-05-02T08:33:47.463Z" }, + { url = "https://files.pythonhosted.org/packages/b6/b3/d4f913660383b3d93dbe6f687a312ea9f7e89879ae883c4e8942048174d4/charset_normalizer-3.4.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:046595208aae0120559a67693ecc65dd75d46f7bf687f159127046628178dc45", size = 153130, upload-time = "2025-05-02T08:33:50.568Z" }, + { url = "https://files.pythonhosted.org/packages/e5/69/7540141529eabc55bf19cc05cd9b61c2078bebfcdbd3e799af99b777fc28/charset_normalizer-3.4.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:75d10d37a47afee94919c4fab4c22b9bc2a8bf7d4f46f87363bcf0573f3ff4f5", size = 145688, upload-time = "2025-05-02T08:33:52.828Z" }, + { url = "https://files.pythonhosted.org/packages/2e/bb/d76d3d6e340fb0967c43c564101e28a78c9a363ea62f736a68af59ee3683/charset_normalizer-3.4.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6333b3aa5a12c26b2a4d4e7335a28f1475e0e5e17d69d55141ee3cab736f66d1", size = 147418, upload-time = "2025-05-02T08:33:54.718Z" }, + { url = "https://files.pythonhosted.org/packages/3e/ef/b7c1f39c0dc3808160c8b72e0209c2479393966313bfebc833533cfff9cc/charset_normalizer-3.4.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e8323a9b031aa0393768b87f04b4164a40037fb2a3c11ac06a03ffecd3618027", size = 150066, upload-time = "2025-05-02T08:33:56.597Z" }, + { url = "https://files.pythonhosted.org/packages/20/26/4e47cc23d2a4a5eb6ed7d6f0f8cda87d753e2f8abc936d5cf5ad2aae8518/charset_normalizer-3.4.2-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:24498ba8ed6c2e0b56d4acbf83f2d989720a93b41d712ebd4f4979660db4417b", size = 144499, upload-time = "2025-05-02T08:33:58.637Z" }, + { url = "https://files.pythonhosted.org/packages/d7/9c/efdf59dd46593cecad0548d36a702683a0bdc056793398a9cd1e1546ad21/charset_normalizer-3.4.2-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:844da2b5728b5ce0e32d863af26f32b5ce61bc4273a9c720a9f3aa9df73b1455", size = 152954, upload-time = "2025-05-02T08:34:00.552Z" }, + { url = "https://files.pythonhosted.org/packages/59/b3/4e8b73f7299d9aaabd7cd26db4a765f741b8e57df97b034bb8de15609002/charset_normalizer-3.4.2-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:65c981bdbd3f57670af8b59777cbfae75364b483fa8a9f420f08094531d54a01", size = 155876, upload-time = "2025-05-02T08:34:02.527Z" }, + { url = "https://files.pythonhosted.org/packages/53/cb/6fa0ccf941a069adce3edb8a1e430bc80e4929f4d43b5140fdf8628bdf7d/charset_normalizer-3.4.2-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:3c21d4fca343c805a52c0c78edc01e3477f6dd1ad7c47653241cf2a206d4fc58", size = 153186, upload-time = "2025-05-02T08:34:04.481Z" }, + { url = "https://files.pythonhosted.org/packages/ac/c6/80b93fabc626b75b1665ffe405e28c3cef0aae9237c5c05f15955af4edd8/charset_normalizer-3.4.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:dc7039885fa1baf9be153a0626e337aa7ec8bf96b0128605fb0d77788ddc1681", size = 148007, upload-time = "2025-05-02T08:34:06.888Z" }, + { url = "https://files.pythonhosted.org/packages/41/eb/c7367ac326a2628e4f05b5c737c86fe4a8eb3ecc597a4243fc65720b3eeb/charset_normalizer-3.4.2-cp38-cp38-win32.whl", hash = "sha256:8272b73e1c5603666618805fe821edba66892e2870058c94c53147602eab29c7", size = 97923, upload-time = "2025-05-02T08:34:08.792Z" }, + { url = "https://files.pythonhosted.org/packages/7c/02/1c82646582ccf2c757fa6af69b1a3ea88744b8d2b4ab93b7686b2533e023/charset_normalizer-3.4.2-cp38-cp38-win_amd64.whl", hash = "sha256:70f7172939fdf8790425ba31915bfbe8335030f05b9913d7ae00a87d4395620a", size = 105020, upload-time = "2025-05-02T08:34:10.6Z" }, + { url = "https://files.pythonhosted.org/packages/28/f8/dfb01ff6cc9af38552c69c9027501ff5a5117c4cc18dcd27cb5259fa1888/charset_normalizer-3.4.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:005fa3432484527f9732ebd315da8da8001593e2cf46a3d817669f062c3d9ed4", size = 201671, upload-time = "2025-05-02T08:34:12.696Z" }, + { url = "https://files.pythonhosted.org/packages/32/fb/74e26ee556a9dbfe3bd264289b67be1e6d616329403036f6507bb9f3f29c/charset_normalizer-3.4.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e92fca20c46e9f5e1bb485887d074918b13543b1c2a1185e69bb8d17ab6236a7", size = 144744, upload-time = "2025-05-02T08:34:14.665Z" }, + { url = "https://files.pythonhosted.org/packages/ad/06/8499ee5aa7addc6f6d72e068691826ff093329fe59891e83b092ae4c851c/charset_normalizer-3.4.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:50bf98d5e563b83cc29471fa114366e6806bc06bc7a25fd59641e41445327836", size = 154993, upload-time = "2025-05-02T08:34:17.134Z" }, + { url = "https://files.pythonhosted.org/packages/f1/a2/5e4c187680728219254ef107a6949c60ee0e9a916a5dadb148c7ae82459c/charset_normalizer-3.4.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:721c76e84fe669be19c5791da68232ca2e05ba5185575086e384352e2c309597", size = 147382, upload-time = "2025-05-02T08:34:19.081Z" }, + { url = "https://files.pythonhosted.org/packages/4c/fe/56aca740dda674f0cc1ba1418c4d84534be51f639b5f98f538b332dc9a95/charset_normalizer-3.4.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82d8fd25b7f4675d0c47cf95b594d4e7b158aca33b76aa63d07186e13c0e0ab7", size = 149536, upload-time = "2025-05-02T08:34:21.073Z" }, + { url = "https://files.pythonhosted.org/packages/53/13/db2e7779f892386b589173dd689c1b1e304621c5792046edd8a978cbf9e0/charset_normalizer-3.4.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b3daeac64d5b371dea99714f08ffc2c208522ec6b06fbc7866a450dd446f5c0f", size = 151349, upload-time = "2025-05-02T08:34:23.193Z" }, + { url = "https://files.pythonhosted.org/packages/69/35/e52ab9a276186f729bce7a0638585d2982f50402046e4b0faa5d2c3ef2da/charset_normalizer-3.4.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:dccab8d5fa1ef9bfba0590ecf4d46df048d18ffe3eec01eeb73a42e0d9e7a8ba", size = 146365, upload-time = "2025-05-02T08:34:25.187Z" }, + { url = "https://files.pythonhosted.org/packages/a6/d8/af7333f732fc2e7635867d56cb7c349c28c7094910c72267586947561b4b/charset_normalizer-3.4.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:aaf27faa992bfee0264dc1f03f4c75e9fcdda66a519db6b957a3f826e285cf12", size = 154499, upload-time = "2025-05-02T08:34:27.359Z" }, + { url = "https://files.pythonhosted.org/packages/7a/3d/a5b2e48acef264d71e036ff30bcc49e51bde80219bb628ba3e00cf59baac/charset_normalizer-3.4.2-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:eb30abc20df9ab0814b5a2524f23d75dcf83cde762c161917a2b4b7b55b1e518", size = 157735, upload-time = "2025-05-02T08:34:29.798Z" }, + { url = "https://files.pythonhosted.org/packages/85/d8/23e2c112532a29f3eef374375a8684a4f3b8e784f62b01da931186f43494/charset_normalizer-3.4.2-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:c72fbbe68c6f32f251bdc08b8611c7b3060612236e960ef848e0a517ddbe76c5", size = 154786, upload-time = "2025-05-02T08:34:31.858Z" }, + { url = "https://files.pythonhosted.org/packages/c7/57/93e0169f08ecc20fe82d12254a200dfaceddc1c12a4077bf454ecc597e33/charset_normalizer-3.4.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:982bb1e8b4ffda883b3d0a521e23abcd6fd17418f6d2c4118d257a10199c0ce3", size = 150203, upload-time = "2025-05-02T08:34:33.88Z" }, + { url = "https://files.pythonhosted.org/packages/2c/9d/9bf2b005138e7e060d7ebdec7503d0ef3240141587651f4b445bdf7286c2/charset_normalizer-3.4.2-cp39-cp39-win32.whl", hash = "sha256:43e0933a0eff183ee85833f341ec567c0980dae57c464d8a508e1b2ceb336471", size = 98436, upload-time = "2025-05-02T08:34:35.907Z" }, + { url = "https://files.pythonhosted.org/packages/6d/24/5849d46cf4311bbf21b424c443b09b459f5b436b1558c04e45dbb7cc478b/charset_normalizer-3.4.2-cp39-cp39-win_amd64.whl", hash = "sha256:d11b54acf878eef558599658b0ffca78138c8c3655cf4f3a4a673c437e67732e", size = 105772, upload-time = "2025-05-02T08:34:37.935Z" }, + { url = "https://files.pythonhosted.org/packages/20/94/c5790835a017658cbfabd07f3bfb549140c3ac458cfc196323996b10095a/charset_normalizer-3.4.2-py3-none-any.whl", hash = "sha256:7f56930ab0abd1c45cd15be65cc741c28b1c9a34876ce8c17a2fa107810c0af0", size = 52626, upload-time = "2025-05-02T08:34:40.053Z" }, +] + +[[package]] +name = "click" +version = "8.1.8" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version == '3.9.*'", + "python_full_version < '3.9'", +] +dependencies = [ + { name = "colorama", marker = "python_full_version < '3.10' and sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b9/2e/0090cbf739cee7d23781ad4b89a9894a41538e4fcf4c31dcdd705b78eb8b/click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a", size = 226593, upload-time = "2024-12-21T18:38:44.339Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/d4/7ebdbd03970677812aac39c869717059dbb71a4cfc033ca6e5221787892c/click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2", size = 98188, upload-time = "2024-12-21T18:38:41.666Z" }, +] + +[[package]] +name = "click" +version = "8.2.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", +] +dependencies = [ + { name = "colorama", marker = "python_full_version >= '3.10' and sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/60/6c/8ca2efa64cf75a977a0d7fac081354553ebe483345c734fb6b6515d96bbc/click-8.2.1.tar.gz", hash = "sha256:27c491cc05d968d271d5a1db13e3b5a184636d9d930f148c50b038f0d0646202", size = 286342, upload-time = "2025-05-20T23:19:49.832Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/85/32/10bb5764d90a8eee674e9dc6f4db6a0ab47c8c4d0d83c27f7c39ac415a4d/click-8.2.1-py3-none-any.whl", hash = "sha256:61a3265b914e850b85317d0b3109c7f8cd35a670f963866005d6ef1d5175a12b", size = 102215, upload-time = "2025-05-20T23:19:47.796Z" }, +] + +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, +] + +[[package]] +name = "exceptiongroup" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions", version = "4.13.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "typing-extensions", version = "4.14.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9' and python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0b/9f/a65090624ecf468cdca03533906e7c69ed7588582240cfe7cc9e770b50eb/exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88", size = 29749, upload-time = "2025-05-10T17:42:51.123Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/36/f4/c6e662dade71f56cd2f3735141b265c3c79293c109549c1e6933b0651ffc/exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10", size = 16674, upload-time = "2025-05-10T17:42:49.33Z" }, +] + +[[package]] +name = "idna" +version = "3.10" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490, upload-time = "2024-09-15T18:07:39.745Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442, upload-time = "2024-09-15T18:07:37.964Z" }, +] + +[[package]] +name = "iniconfig" +version = "2.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f2/97/ebf4da567aa6827c909642694d71c9fcf53e5b504f2d96afea02718862f3/iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7", size = 4793, upload-time = "2025-03-19T20:09:59.721Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050, upload-time = "2025-03-19T20:10:01.071Z" }, +] + +[[package]] +name = "loguru" +version = "0.7.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "win32-setctime", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3a/05/a1dae3dffd1116099471c643b8924f5aa6524411dc6c63fdae648c4f1aca/loguru-0.7.3.tar.gz", hash = "sha256:19480589e77d47b8d85b2c827ad95d49bf31b0dcde16593892eb51dd18706eb6", size = 63559, upload-time = "2024-12-06T11:20:56.608Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0c/29/0348de65b8cc732daa3e33e67806420b2ae89bdce2b04af740289c5c6c8c/loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c", size = 61595, upload-time = "2024-12-06T11:20:54.538Z" }, +] + +[[package]] +name = "mypy" +version = "1.14.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +dependencies = [ + { name = "mypy-extensions", marker = "python_full_version < '3.9'" }, + { name = "tomli", marker = "python_full_version < '3.9'" }, + { name = "typing-extensions", version = "4.13.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b9/eb/2c92d8ea1e684440f54fa49ac5d9a5f19967b7b472a281f419e69a8d228e/mypy-1.14.1.tar.gz", hash = "sha256:7ec88144fe9b510e8475ec2f5f251992690fcf89ccb4500b214b4226abcd32d6", size = 3216051, upload-time = "2024-12-30T16:39:07.335Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9b/7a/87ae2adb31d68402da6da1e5f30c07ea6063e9f09b5e7cfc9dfa44075e74/mypy-1.14.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:52686e37cf13d559f668aa398dd7ddf1f92c5d613e4f8cb262be2fb4fedb0fcb", size = 11211002, upload-time = "2024-12-30T16:37:22.435Z" }, + { url = "https://files.pythonhosted.org/packages/e1/23/eada4c38608b444618a132be0d199b280049ded278b24cbb9d3fc59658e4/mypy-1.14.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1fb545ca340537d4b45d3eecdb3def05e913299ca72c290326be19b3804b39c0", size = 10358400, upload-time = "2024-12-30T16:37:53.526Z" }, + { url = "https://files.pythonhosted.org/packages/43/c9/d6785c6f66241c62fd2992b05057f404237deaad1566545e9f144ced07f5/mypy-1.14.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:90716d8b2d1f4cd503309788e51366f07c56635a3309b0f6a32547eaaa36a64d", size = 12095172, upload-time = "2024-12-30T16:37:50.332Z" }, + { url = "https://files.pythonhosted.org/packages/c3/62/daa7e787770c83c52ce2aaf1a111eae5893de9e004743f51bfcad9e487ec/mypy-1.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2ae753f5c9fef278bcf12e1a564351764f2a6da579d4a81347e1d5a15819997b", size = 12828732, upload-time = "2024-12-30T16:37:29.96Z" }, + { url = "https://files.pythonhosted.org/packages/1b/a2/5fb18318a3637f29f16f4e41340b795da14f4751ef4f51c99ff39ab62e52/mypy-1.14.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e0fe0f5feaafcb04505bcf439e991c6d8f1bf8b15f12b05feeed96e9e7bf1427", size = 13012197, upload-time = "2024-12-30T16:38:05.037Z" }, + { url = "https://files.pythonhosted.org/packages/28/99/e153ce39105d164b5f02c06c35c7ba958aaff50a2babba7d080988b03fe7/mypy-1.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:7d54bd85b925e501c555a3227f3ec0cfc54ee8b6930bd6141ec872d1c572f81f", size = 9780836, upload-time = "2024-12-30T16:37:19.726Z" }, + { url = "https://files.pythonhosted.org/packages/da/11/a9422850fd506edbcdc7f6090682ecceaf1f87b9dd847f9df79942da8506/mypy-1.14.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f995e511de847791c3b11ed90084a7a0aafdc074ab88c5a9711622fe4751138c", size = 11120432, upload-time = "2024-12-30T16:37:11.533Z" }, + { url = "https://files.pythonhosted.org/packages/b6/9e/47e450fd39078d9c02d620545b2cb37993a8a8bdf7db3652ace2f80521ca/mypy-1.14.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d64169ec3b8461311f8ce2fd2eb5d33e2d0f2c7b49116259c51d0d96edee48d1", size = 10279515, upload-time = "2024-12-30T16:37:40.724Z" }, + { url = "https://files.pythonhosted.org/packages/01/b5/6c8d33bd0f851a7692a8bfe4ee75eb82b6983a3cf39e5e32a5d2a723f0c1/mypy-1.14.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ba24549de7b89b6381b91fbc068d798192b1b5201987070319889e93038967a8", size = 12025791, upload-time = "2024-12-30T16:36:58.73Z" }, + { url = "https://files.pythonhosted.org/packages/f0/4c/e10e2c46ea37cab5c471d0ddaaa9a434dc1d28650078ac1b56c2d7b9b2e4/mypy-1.14.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:183cf0a45457d28ff9d758730cd0210419ac27d4d3f285beda038c9083363b1f", size = 12749203, upload-time = "2024-12-30T16:37:03.741Z" }, + { url = "https://files.pythonhosted.org/packages/88/55/beacb0c69beab2153a0f57671ec07861d27d735a0faff135a494cd4f5020/mypy-1.14.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f2a0ecc86378f45347f586e4163d1769dd81c5a223d577fe351f26b179e148b1", size = 12885900, upload-time = "2024-12-30T16:37:57.948Z" }, + { url = "https://files.pythonhosted.org/packages/a2/75/8c93ff7f315c4d086a2dfcde02f713004357d70a163eddb6c56a6a5eff40/mypy-1.14.1-cp311-cp311-win_amd64.whl", hash = "sha256:ad3301ebebec9e8ee7135d8e3109ca76c23752bac1e717bc84cd3836b4bf3eae", size = 9777869, upload-time = "2024-12-30T16:37:33.428Z" }, + { url = "https://files.pythonhosted.org/packages/43/1b/b38c079609bb4627905b74fc6a49849835acf68547ac33d8ceb707de5f52/mypy-1.14.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:30ff5ef8519bbc2e18b3b54521ec319513a26f1bba19a7582e7b1f58a6e69f14", size = 11266668, upload-time = "2024-12-30T16:38:02.211Z" }, + { url = "https://files.pythonhosted.org/packages/6b/75/2ed0d2964c1ffc9971c729f7a544e9cd34b2cdabbe2d11afd148d7838aa2/mypy-1.14.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cb9f255c18052343c70234907e2e532bc7e55a62565d64536dbc7706a20b78b9", size = 10254060, upload-time = "2024-12-30T16:37:46.131Z" }, + { url = "https://files.pythonhosted.org/packages/a1/5f/7b8051552d4da3c51bbe8fcafffd76a6823779101a2b198d80886cd8f08e/mypy-1.14.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b4e3413e0bddea671012b063e27591b953d653209e7a4fa5e48759cda77ca11", size = 11933167, upload-time = "2024-12-30T16:37:43.534Z" }, + { url = "https://files.pythonhosted.org/packages/04/90/f53971d3ac39d8b68bbaab9a4c6c58c8caa4d5fd3d587d16f5927eeeabe1/mypy-1.14.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:553c293b1fbdebb6c3c4030589dab9fafb6dfa768995a453d8a5d3b23784af2e", size = 12864341, upload-time = "2024-12-30T16:37:36.249Z" }, + { url = "https://files.pythonhosted.org/packages/03/d2/8bc0aeaaf2e88c977db41583559319f1821c069e943ada2701e86d0430b7/mypy-1.14.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fad79bfe3b65fe6a1efaed97b445c3d37f7be9fdc348bdb2d7cac75579607c89", size = 12972991, upload-time = "2024-12-30T16:37:06.743Z" }, + { url = "https://files.pythonhosted.org/packages/6f/17/07815114b903b49b0f2cf7499f1c130e5aa459411596668267535fe9243c/mypy-1.14.1-cp312-cp312-win_amd64.whl", hash = "sha256:8fa2220e54d2946e94ab6dbb3ba0a992795bd68b16dc852db33028df2b00191b", size = 9879016, upload-time = "2024-12-30T16:37:15.02Z" }, + { url = "https://files.pythonhosted.org/packages/9e/15/bb6a686901f59222275ab228453de741185f9d54fecbaacec041679496c6/mypy-1.14.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:92c3ed5afb06c3a8e188cb5da4984cab9ec9a77ba956ee419c68a388b4595255", size = 11252097, upload-time = "2024-12-30T16:37:25.144Z" }, + { url = "https://files.pythonhosted.org/packages/f8/b3/8b0f74dfd072c802b7fa368829defdf3ee1566ba74c32a2cb2403f68024c/mypy-1.14.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:dbec574648b3e25f43d23577309b16534431db4ddc09fda50841f1e34e64ed34", size = 10239728, upload-time = "2024-12-30T16:38:08.634Z" }, + { url = "https://files.pythonhosted.org/packages/c5/9b/4fd95ab20c52bb5b8c03cc49169be5905d931de17edfe4d9d2986800b52e/mypy-1.14.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8c6d94b16d62eb3e947281aa7347d78236688e21081f11de976376cf010eb31a", size = 11924965, upload-time = "2024-12-30T16:38:12.132Z" }, + { url = "https://files.pythonhosted.org/packages/56/9d/4a236b9c57f5d8f08ed346914b3f091a62dd7e19336b2b2a0d85485f82ff/mypy-1.14.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d4b19b03fdf54f3c5b2fa474c56b4c13c9dbfb9a2db4370ede7ec11a2c5927d9", size = 12867660, upload-time = "2024-12-30T16:38:17.342Z" }, + { url = "https://files.pythonhosted.org/packages/40/88/a61a5497e2f68d9027de2bb139c7bb9abaeb1be1584649fa9d807f80a338/mypy-1.14.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:0c911fde686394753fff899c409fd4e16e9b294c24bfd5e1ea4675deae1ac6fd", size = 12969198, upload-time = "2024-12-30T16:38:32.839Z" }, + { url = "https://files.pythonhosted.org/packages/54/da/3d6fc5d92d324701b0c23fb413c853892bfe0e1dbe06c9138037d459756b/mypy-1.14.1-cp313-cp313-win_amd64.whl", hash = "sha256:8b21525cb51671219f5307be85f7e646a153e5acc656e5cebf64bfa076c50107", size = 9885276, upload-time = "2024-12-30T16:38:20.828Z" }, + { url = "https://files.pythonhosted.org/packages/39/02/1817328c1372be57c16148ce7d2bfcfa4a796bedaed897381b1aad9b267c/mypy-1.14.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7084fb8f1128c76cd9cf68fe5971b37072598e7c31b2f9f95586b65c741a9d31", size = 11143050, upload-time = "2024-12-30T16:38:29.743Z" }, + { url = "https://files.pythonhosted.org/packages/b9/07/99db9a95ece5e58eee1dd87ca456a7e7b5ced6798fd78182c59c35a7587b/mypy-1.14.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:8f845a00b4f420f693f870eaee5f3e2692fa84cc8514496114649cfa8fd5e2c6", size = 10321087, upload-time = "2024-12-30T16:38:14.739Z" }, + { url = "https://files.pythonhosted.org/packages/9a/eb/85ea6086227b84bce79b3baf7f465b4732e0785830726ce4a51528173b71/mypy-1.14.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:44bf464499f0e3a2d14d58b54674dee25c031703b2ffc35064bd0df2e0fac319", size = 12066766, upload-time = "2024-12-30T16:38:47.038Z" }, + { url = "https://files.pythonhosted.org/packages/4b/bb/f01bebf76811475d66359c259eabe40766d2f8ac8b8250d4e224bb6df379/mypy-1.14.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c99f27732c0b7dc847adb21c9d47ce57eb48fa33a17bc6d7d5c5e9f9e7ae5bac", size = 12787111, upload-time = "2024-12-30T16:39:02.444Z" }, + { url = "https://files.pythonhosted.org/packages/2f/c9/84837ff891edcb6dcc3c27d85ea52aab0c4a34740ff5f0ccc0eb87c56139/mypy-1.14.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:bce23c7377b43602baa0bd22ea3265c49b9ff0b76eb315d6c34721af4cdf1d9b", size = 12974331, upload-time = "2024-12-30T16:38:23.849Z" }, + { url = "https://files.pythonhosted.org/packages/84/5f/901e18464e6a13f8949b4909535be3fa7f823291b8ab4e4b36cfe57d6769/mypy-1.14.1-cp38-cp38-win_amd64.whl", hash = "sha256:8edc07eeade7ebc771ff9cf6b211b9a7d93687ff892150cb5692e4f4272b0837", size = 9763210, upload-time = "2024-12-30T16:38:36.299Z" }, + { url = "https://files.pythonhosted.org/packages/ca/1f/186d133ae2514633f8558e78cd658070ba686c0e9275c5a5c24a1e1f0d67/mypy-1.14.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3888a1816d69f7ab92092f785a462944b3ca16d7c470d564165fe703b0970c35", size = 11200493, upload-time = "2024-12-30T16:38:26.935Z" }, + { url = "https://files.pythonhosted.org/packages/af/fc/4842485d034e38a4646cccd1369f6b1ccd7bc86989c52770d75d719a9941/mypy-1.14.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:46c756a444117c43ee984bd055db99e498bc613a70bbbc120272bd13ca579fbc", size = 10357702, upload-time = "2024-12-30T16:38:50.623Z" }, + { url = "https://files.pythonhosted.org/packages/b4/e6/457b83f2d701e23869cfec013a48a12638f75b9d37612a9ddf99072c1051/mypy-1.14.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:27fc248022907e72abfd8e22ab1f10e903915ff69961174784a3900a8cba9ad9", size = 12091104, upload-time = "2024-12-30T16:38:53.735Z" }, + { url = "https://files.pythonhosted.org/packages/f1/bf/76a569158db678fee59f4fd30b8e7a0d75bcbaeef49edd882a0d63af6d66/mypy-1.14.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:499d6a72fb7e5de92218db961f1a66d5f11783f9ae549d214617edab5d4dbdbb", size = 12830167, upload-time = "2024-12-30T16:38:56.437Z" }, + { url = "https://files.pythonhosted.org/packages/43/bc/0bc6b694b3103de9fed61867f1c8bd33336b913d16831431e7cb48ef1c92/mypy-1.14.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:57961db9795eb566dc1d1b4e9139ebc4c6b0cb6e7254ecde69d1552bf7613f60", size = 13013834, upload-time = "2024-12-30T16:38:59.204Z" }, + { url = "https://files.pythonhosted.org/packages/b0/79/5f5ec47849b6df1e6943d5fd8e6632fbfc04b4fd4acfa5a5a9535d11b4e2/mypy-1.14.1-cp39-cp39-win_amd64.whl", hash = "sha256:07ba89fdcc9451f2ebb02853deb6aaaa3d2239a236669a63ab3801bbf923ef5c", size = 9781231, upload-time = "2024-12-30T16:39:05.124Z" }, + { url = "https://files.pythonhosted.org/packages/a0/b5/32dd67b69a16d088e533962e5044e51004176a9952419de0370cdaead0f8/mypy-1.14.1-py3-none-any.whl", hash = "sha256:b66a60cc4073aeb8ae00057f9c1f64d49e90f918fbcef9a977eb121da8b8f1d1", size = 2752905, upload-time = "2024-12-30T16:38:42.021Z" }, +] + +[[package]] +name = "mypy" +version = "1.17.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", +] +dependencies = [ + { name = "mypy-extensions", marker = "python_full_version >= '3.9'" }, + { name = "pathspec", marker = "python_full_version >= '3.9'" }, + { name = "tomli", marker = "python_full_version >= '3.9' and python_full_version < '3.11'" }, + { name = "typing-extensions", version = "4.14.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8e/22/ea637422dedf0bf36f3ef238eab4e455e2a0dcc3082b5cc067615347ab8e/mypy-1.17.1.tar.gz", hash = "sha256:25e01ec741ab5bb3eec8ba9cdb0f769230368a22c959c4937360efb89b7e9f01", size = 3352570, upload-time = "2025-07-31T07:54:19.204Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/77/a9/3d7aa83955617cdf02f94e50aab5c830d205cfa4320cf124ff64acce3a8e/mypy-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3fbe6d5555bf608c47203baa3e72dbc6ec9965b3d7c318aa9a4ca76f465bd972", size = 11003299, upload-time = "2025-07-31T07:54:06.425Z" }, + { url = "https://files.pythonhosted.org/packages/83/e8/72e62ff837dd5caaac2b4a5c07ce769c8e808a00a65e5d8f94ea9c6f20ab/mypy-1.17.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:80ef5c058b7bce08c83cac668158cb7edea692e458d21098c7d3bce35a5d43e7", size = 10125451, upload-time = "2025-07-31T07:53:52.974Z" }, + { url = "https://files.pythonhosted.org/packages/7d/10/f3f3543f6448db11881776f26a0ed079865926b0c841818ee22de2c6bbab/mypy-1.17.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c4a580f8a70c69e4a75587bd925d298434057fe2a428faaf927ffe6e4b9a98df", size = 11916211, upload-time = "2025-07-31T07:53:18.879Z" }, + { url = "https://files.pythonhosted.org/packages/06/bf/63e83ed551282d67bb3f7fea2cd5561b08d2bb6eb287c096539feb5ddbc5/mypy-1.17.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dd86bb649299f09d987a2eebb4d52d10603224500792e1bee18303bbcc1ce390", size = 12652687, upload-time = "2025-07-31T07:53:30.544Z" }, + { url = "https://files.pythonhosted.org/packages/69/66/68f2eeef11facf597143e85b694a161868b3b006a5fbad50e09ea117ef24/mypy-1.17.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:a76906f26bd8d51ea9504966a9c25419f2e668f012e0bdf3da4ea1526c534d94", size = 12896322, upload-time = "2025-07-31T07:53:50.74Z" }, + { url = "https://files.pythonhosted.org/packages/a3/87/8e3e9c2c8bd0d7e071a89c71be28ad088aaecbadf0454f46a540bda7bca6/mypy-1.17.1-cp310-cp310-win_amd64.whl", hash = "sha256:e79311f2d904ccb59787477b7bd5d26f3347789c06fcd7656fa500875290264b", size = 9507962, upload-time = "2025-07-31T07:53:08.431Z" }, + { url = "https://files.pythonhosted.org/packages/46/cf/eadc80c4e0a70db1c08921dcc220357ba8ab2faecb4392e3cebeb10edbfa/mypy-1.17.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ad37544be07c5d7fba814eb370e006df58fed8ad1ef33ed1649cb1889ba6ff58", size = 10921009, upload-time = "2025-07-31T07:53:23.037Z" }, + { url = "https://files.pythonhosted.org/packages/5d/c1/c869d8c067829ad30d9bdae051046561552516cfb3a14f7f0347b7d973ee/mypy-1.17.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:064e2ff508e5464b4bd807a7c1625bc5047c5022b85c70f030680e18f37273a5", size = 10047482, upload-time = "2025-07-31T07:53:26.151Z" }, + { url = "https://files.pythonhosted.org/packages/98/b9/803672bab3fe03cee2e14786ca056efda4bb511ea02dadcedde6176d06d0/mypy-1.17.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:70401bbabd2fa1aa7c43bb358f54037baf0586f41e83b0ae67dd0534fc64edfd", size = 11832883, upload-time = "2025-07-31T07:53:47.948Z" }, + { url = "https://files.pythonhosted.org/packages/88/fb/fcdac695beca66800918c18697b48833a9a6701de288452b6715a98cfee1/mypy-1.17.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e92bdc656b7757c438660f775f872a669b8ff374edc4d18277d86b63edba6b8b", size = 12566215, upload-time = "2025-07-31T07:54:04.031Z" }, + { url = "https://files.pythonhosted.org/packages/7f/37/a932da3d3dace99ee8eb2043b6ab03b6768c36eb29a02f98f46c18c0da0e/mypy-1.17.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c1fdf4abb29ed1cb091cf432979e162c208a5ac676ce35010373ff29247bcad5", size = 12751956, upload-time = "2025-07-31T07:53:36.263Z" }, + { url = "https://files.pythonhosted.org/packages/8c/cf/6438a429e0f2f5cab8bc83e53dbebfa666476f40ee322e13cac5e64b79e7/mypy-1.17.1-cp311-cp311-win_amd64.whl", hash = "sha256:ff2933428516ab63f961644bc49bc4cbe42bbffb2cd3b71cc7277c07d16b1a8b", size = 9507307, upload-time = "2025-07-31T07:53:59.734Z" }, + { url = "https://files.pythonhosted.org/packages/17/a2/7034d0d61af8098ec47902108553122baa0f438df8a713be860f7407c9e6/mypy-1.17.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:69e83ea6553a3ba79c08c6e15dbd9bfa912ec1e493bf75489ef93beb65209aeb", size = 11086295, upload-time = "2025-07-31T07:53:28.124Z" }, + { url = "https://files.pythonhosted.org/packages/14/1f/19e7e44b594d4b12f6ba8064dbe136505cec813549ca3e5191e40b1d3cc2/mypy-1.17.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1b16708a66d38abb1e6b5702f5c2c87e133289da36f6a1d15f6a5221085c6403", size = 10112355, upload-time = "2025-07-31T07:53:21.121Z" }, + { url = "https://files.pythonhosted.org/packages/5b/69/baa33927e29e6b4c55d798a9d44db5d394072eef2bdc18c3e2048c9ed1e9/mypy-1.17.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:89e972c0035e9e05823907ad5398c5a73b9f47a002b22359b177d40bdaee7056", size = 11875285, upload-time = "2025-07-31T07:53:55.293Z" }, + { url = "https://files.pythonhosted.org/packages/90/13/f3a89c76b0a41e19490b01e7069713a30949d9a6c147289ee1521bcea245/mypy-1.17.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:03b6d0ed2b188e35ee6d5c36b5580cffd6da23319991c49ab5556c023ccf1341", size = 12737895, upload-time = "2025-07-31T07:53:43.623Z" }, + { url = "https://files.pythonhosted.org/packages/23/a1/c4ee79ac484241301564072e6476c5a5be2590bc2e7bfd28220033d2ef8f/mypy-1.17.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c837b896b37cd103570d776bda106eabb8737aa6dd4f248451aecf53030cdbeb", size = 12931025, upload-time = "2025-07-31T07:54:17.125Z" }, + { url = "https://files.pythonhosted.org/packages/89/b8/7409477be7919a0608900e6320b155c72caab4fef46427c5cc75f85edadd/mypy-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:665afab0963a4b39dff7c1fa563cc8b11ecff7910206db4b2e64dd1ba25aed19", size = 9584664, upload-time = "2025-07-31T07:54:12.842Z" }, + { url = "https://files.pythonhosted.org/packages/5b/82/aec2fc9b9b149f372850291827537a508d6c4d3664b1750a324b91f71355/mypy-1.17.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:93378d3203a5c0800c6b6d850ad2f19f7a3cdf1a3701d3416dbf128805c6a6a7", size = 11075338, upload-time = "2025-07-31T07:53:38.873Z" }, + { url = "https://files.pythonhosted.org/packages/07/ac/ee93fbde9d2242657128af8c86f5d917cd2887584cf948a8e3663d0cd737/mypy-1.17.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:15d54056f7fe7a826d897789f53dd6377ec2ea8ba6f776dc83c2902b899fee81", size = 10113066, upload-time = "2025-07-31T07:54:14.707Z" }, + { url = "https://files.pythonhosted.org/packages/5a/68/946a1e0be93f17f7caa56c45844ec691ca153ee8b62f21eddda336a2d203/mypy-1.17.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:209a58fed9987eccc20f2ca94afe7257a8f46eb5df1fb69958650973230f91e6", size = 11875473, upload-time = "2025-07-31T07:53:14.504Z" }, + { url = "https://files.pythonhosted.org/packages/9f/0f/478b4dce1cb4f43cf0f0d00fba3030b21ca04a01b74d1cd272a528cf446f/mypy-1.17.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:099b9a5da47de9e2cb5165e581f158e854d9e19d2e96b6698c0d64de911dd849", size = 12744296, upload-time = "2025-07-31T07:53:03.896Z" }, + { url = "https://files.pythonhosted.org/packages/ca/70/afa5850176379d1b303f992a828de95fc14487429a7139a4e0bdd17a8279/mypy-1.17.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fa6ffadfbe6994d724c5a1bb6123a7d27dd68fc9c059561cd33b664a79578e14", size = 12914657, upload-time = "2025-07-31T07:54:08.576Z" }, + { url = "https://files.pythonhosted.org/packages/53/f9/4a83e1c856a3d9c8f6edaa4749a4864ee98486e9b9dbfbc93842891029c2/mypy-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:9a2b7d9180aed171f033c9f2fc6c204c1245cf60b0cb61cf2e7acc24eea78e0a", size = 9593320, upload-time = "2025-07-31T07:53:01.341Z" }, + { url = "https://files.pythonhosted.org/packages/38/56/79c2fac86da57c7d8c48622a05873eaab40b905096c33597462713f5af90/mypy-1.17.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:15a83369400454c41ed3a118e0cc58bd8123921a602f385cb6d6ea5df050c733", size = 11040037, upload-time = "2025-07-31T07:54:10.942Z" }, + { url = "https://files.pythonhosted.org/packages/4d/c3/adabe6ff53638e3cad19e3547268482408323b1e68bf082c9119000cd049/mypy-1.17.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:55b918670f692fc9fba55c3298d8a3beae295c5cded0a55dccdc5bbead814acd", size = 10131550, upload-time = "2025-07-31T07:53:41.307Z" }, + { url = "https://files.pythonhosted.org/packages/b8/c5/2e234c22c3bdeb23a7817af57a58865a39753bde52c74e2c661ee0cfc640/mypy-1.17.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:62761474061feef6f720149d7ba876122007ddc64adff5ba6f374fda35a018a0", size = 11872963, upload-time = "2025-07-31T07:53:16.878Z" }, + { url = "https://files.pythonhosted.org/packages/ab/26/c13c130f35ca8caa5f2ceab68a247775648fdcd6c9a18f158825f2bc2410/mypy-1.17.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c49562d3d908fd49ed0938e5423daed8d407774a479b595b143a3d7f87cdae6a", size = 12710189, upload-time = "2025-07-31T07:54:01.962Z" }, + { url = "https://files.pythonhosted.org/packages/82/df/c7d79d09f6de8383fe800521d066d877e54d30b4fb94281c262be2df84ef/mypy-1.17.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:397fba5d7616a5bc60b45c7ed204717eaddc38f826e3645402c426057ead9a91", size = 12900322, upload-time = "2025-07-31T07:53:10.551Z" }, + { url = "https://files.pythonhosted.org/packages/b8/98/3d5a48978b4f708c55ae832619addc66d677f6dc59f3ebad71bae8285ca6/mypy-1.17.1-cp314-cp314-win_amd64.whl", hash = "sha256:9d6b20b97d373f41617bd0708fd46aa656059af57f2ef72aa8c7d6a2b73b74ed", size = 9751879, upload-time = "2025-07-31T07:52:56.683Z" }, + { url = "https://files.pythonhosted.org/packages/29/cb/673e3d34e5d8de60b3a61f44f80150a738bff568cd6b7efb55742a605e98/mypy-1.17.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5d1092694f166a7e56c805caaf794e0585cabdbf1df36911c414e4e9abb62ae9", size = 10992466, upload-time = "2025-07-31T07:53:57.574Z" }, + { url = "https://files.pythonhosted.org/packages/0c/d0/fe1895836eea3a33ab801561987a10569df92f2d3d4715abf2cfeaa29cb2/mypy-1.17.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:79d44f9bfb004941ebb0abe8eff6504223a9c1ac51ef967d1263c6572bbebc99", size = 10117638, upload-time = "2025-07-31T07:53:34.256Z" }, + { url = "https://files.pythonhosted.org/packages/97/f3/514aa5532303aafb95b9ca400a31054a2bd9489de166558c2baaeea9c522/mypy-1.17.1-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b01586eed696ec905e61bd2568f48740f7ac4a45b3a468e6423a03d3788a51a8", size = 11915673, upload-time = "2025-07-31T07:52:59.361Z" }, + { url = "https://files.pythonhosted.org/packages/ab/c3/c0805f0edec96fe8e2c048b03769a6291523d509be8ee7f56ae922fa3882/mypy-1.17.1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:43808d9476c36b927fbcd0b0255ce75efe1b68a080154a38ae68a7e62de8f0f8", size = 12649022, upload-time = "2025-07-31T07:53:45.92Z" }, + { url = "https://files.pythonhosted.org/packages/45/3e/d646b5a298ada21a8512fa7e5531f664535a495efa672601702398cea2b4/mypy-1.17.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:feb8cc32d319edd5859da2cc084493b3e2ce5e49a946377663cc90f6c15fb259", size = 12895536, upload-time = "2025-07-31T07:53:06.17Z" }, + { url = "https://files.pythonhosted.org/packages/14/55/e13d0dcd276975927d1f4e9e2ec4fd409e199f01bdc671717e673cc63a22/mypy-1.17.1-cp39-cp39-win_amd64.whl", hash = "sha256:d7598cf74c3e16539d4e2f0b8d8c318e00041553d83d4861f87c7a72e95ac24d", size = 9512564, upload-time = "2025-07-31T07:53:12.346Z" }, + { url = "https://files.pythonhosted.org/packages/1d/f3/8fcd2af0f5b806f6cf463efaffd3c9548a28f84220493ecd38d127b6b66d/mypy-1.17.1-py3-none-any.whl", hash = "sha256:a9f52c0351c21fe24c21d8c0eb1f62967b262d6729393397b6f443c3b773c3b9", size = 2283411, upload-time = "2025-07-31T07:53:24.664Z" }, +] + +[[package]] +name = "mypy-extensions" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" }, +] + +[[package]] +name = "numpy" +version = "1.24.4" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +sdist = { url = "https://files.pythonhosted.org/packages/a4/9b/027bec52c633f6556dba6b722d9a0befb40498b9ceddd29cbe67a45a127c/numpy-1.24.4.tar.gz", hash = "sha256:80f5e3a4e498641401868df4208b74581206afbee7cf7b8329daae82676d9463", size = 10911229, upload-time = "2023-06-26T13:39:33.218Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6b/80/6cdfb3e275d95155a34659163b83c09e3a3ff9f1456880bec6cc63d71083/numpy-1.24.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c0bfb52d2169d58c1cdb8cc1f16989101639b34c7d3ce60ed70b19c63eba0b64", size = 19789140, upload-time = "2023-06-26T13:22:33.184Z" }, + { url = "https://files.pythonhosted.org/packages/64/5f/3f01d753e2175cfade1013eea08db99ba1ee4bdb147ebcf3623b75d12aa7/numpy-1.24.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ed094d4f0c177b1b8e7aa9cba7d6ceed51c0e569a5318ac0ca9a090680a6a1b1", size = 13854297, upload-time = "2023-06-26T13:22:59.541Z" }, + { url = "https://files.pythonhosted.org/packages/5a/b3/2f9c21d799fa07053ffa151faccdceeb69beec5a010576b8991f614021f7/numpy-1.24.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79fc682a374c4a8ed08b331bef9c5f582585d1048fa6d80bc6c35bc384eee9b4", size = 13995611, upload-time = "2023-06-26T13:23:22.167Z" }, + { url = "https://files.pythonhosted.org/packages/10/be/ae5bf4737cb79ba437879915791f6f26d92583c738d7d960ad94e5c36adf/numpy-1.24.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ffe43c74893dbf38c2b0a1f5428760a1a9c98285553c89e12d70a96a7f3a4d6", size = 17282357, upload-time = "2023-06-26T13:23:51.446Z" }, + { url = "https://files.pythonhosted.org/packages/c0/64/908c1087be6285f40e4b3e79454552a701664a079321cff519d8c7051d06/numpy-1.24.4-cp310-cp310-win32.whl", hash = "sha256:4c21decb6ea94057331e111a5bed9a79d335658c27ce2adb580fb4d54f2ad9bc", size = 12429222, upload-time = "2023-06-26T13:24:13.849Z" }, + { url = "https://files.pythonhosted.org/packages/22/55/3d5a7c1142e0d9329ad27cece17933b0e2ab4e54ddc5c1861fbfeb3f7693/numpy-1.24.4-cp310-cp310-win_amd64.whl", hash = "sha256:b4bea75e47d9586d31e892a7401f76e909712a0fd510f58f5337bea9572c571e", size = 14841514, upload-time = "2023-06-26T13:24:38.129Z" }, + { url = "https://files.pythonhosted.org/packages/a9/cc/5ed2280a27e5dab12994c884f1f4d8c3bd4d885d02ae9e52a9d213a6a5e2/numpy-1.24.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f136bab9c2cfd8da131132c2cf6cc27331dd6fae65f95f69dcd4ae3c3639c810", size = 19775508, upload-time = "2023-06-26T13:25:08.882Z" }, + { url = "https://files.pythonhosted.org/packages/c0/bc/77635c657a3668cf652806210b8662e1aff84b818a55ba88257abf6637a8/numpy-1.24.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e2926dac25b313635e4d6cf4dc4e51c8c0ebfed60b801c799ffc4c32bf3d1254", size = 13840033, upload-time = "2023-06-26T13:25:33.417Z" }, + { url = "https://files.pythonhosted.org/packages/a7/4c/96cdaa34f54c05e97c1c50f39f98d608f96f0677a6589e64e53104e22904/numpy-1.24.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:222e40d0e2548690405b0b3c7b21d1169117391c2e82c378467ef9ab4c8f0da7", size = 13991951, upload-time = "2023-06-26T13:25:55.725Z" }, + { url = "https://files.pythonhosted.org/packages/22/97/dfb1a31bb46686f09e68ea6ac5c63fdee0d22d7b23b8f3f7ea07712869ef/numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7215847ce88a85ce39baf9e89070cb860c98fdddacbaa6c0da3ffb31b3350bd5", size = 17278923, upload-time = "2023-06-26T13:26:25.658Z" }, + { url = "https://files.pythonhosted.org/packages/35/e2/76a11e54139654a324d107da1d98f99e7aa2a7ef97cfd7c631fba7dbde71/numpy-1.24.4-cp311-cp311-win32.whl", hash = "sha256:4979217d7de511a8d57f4b4b5b2b965f707768440c17cb70fbf254c4b225238d", size = 12422446, upload-time = "2023-06-26T13:26:49.302Z" }, + { url = "https://files.pythonhosted.org/packages/d8/ec/ebef2f7d7c28503f958f0f8b992e7ce606fb74f9e891199329d5f5f87404/numpy-1.24.4-cp311-cp311-win_amd64.whl", hash = "sha256:b7b1fc9864d7d39e28f41d089bfd6353cb5f27ecd9905348c24187a768c79694", size = 14834466, upload-time = "2023-06-26T13:27:16.029Z" }, + { url = "https://files.pythonhosted.org/packages/11/10/943cfb579f1a02909ff96464c69893b1d25be3731b5d3652c2e0cf1281ea/numpy-1.24.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1452241c290f3e2a312c137a9999cdbf63f78864d63c79039bda65ee86943f61", size = 19780722, upload-time = "2023-06-26T13:27:49.573Z" }, + { url = "https://files.pythonhosted.org/packages/a7/ae/f53b7b265fdc701e663fbb322a8e9d4b14d9cb7b2385f45ddfabfc4327e4/numpy-1.24.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:04640dab83f7c6c85abf9cd729c5b65f1ebd0ccf9de90b270cd61935eef0197f", size = 13843102, upload-time = "2023-06-26T13:28:12.288Z" }, + { url = "https://files.pythonhosted.org/packages/25/6f/2586a50ad72e8dbb1d8381f837008a0321a3516dfd7cb57fc8cf7e4bb06b/numpy-1.24.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5425b114831d1e77e4b5d812b69d11d962e104095a5b9c3b641a218abcc050e", size = 14039616, upload-time = "2023-06-26T13:28:35.659Z" }, + { url = "https://files.pythonhosted.org/packages/98/5d/5738903efe0ecb73e51eb44feafba32bdba2081263d40c5043568ff60faf/numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd80e219fd4c71fc3699fc1dadac5dcf4fd882bfc6f7ec53d30fa197b8ee22dc", size = 17316263, upload-time = "2023-06-26T13:29:09.272Z" }, + { url = "https://files.pythonhosted.org/packages/d1/57/8d328f0b91c733aa9aa7ee540dbc49b58796c862b4fbcb1146c701e888da/numpy-1.24.4-cp38-cp38-win32.whl", hash = "sha256:4602244f345453db537be5314d3983dbf5834a9701b7723ec28923e2889e0bb2", size = 12455660, upload-time = "2023-06-26T13:29:33.434Z" }, + { url = "https://files.pythonhosted.org/packages/69/65/0d47953afa0ad569d12de5f65d964321c208492064c38fe3b0b9744f8d44/numpy-1.24.4-cp38-cp38-win_amd64.whl", hash = "sha256:692f2e0f55794943c5bfff12b3f56f99af76f902fc47487bdfe97856de51a706", size = 14868112, upload-time = "2023-06-26T13:29:58.385Z" }, + { url = "https://files.pythonhosted.org/packages/9a/cd/d5b0402b801c8a8b56b04c1e85c6165efab298d2f0ab741c2406516ede3a/numpy-1.24.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2541312fbf09977f3b3ad449c4e5f4bb55d0dbf79226d7724211acc905049400", size = 19816549, upload-time = "2023-06-26T13:30:36.976Z" }, + { url = "https://files.pythonhosted.org/packages/14/27/638aaa446f39113a3ed38b37a66243e21b38110d021bfcb940c383e120f2/numpy-1.24.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9667575fb6d13c95f1b36aca12c5ee3356bf001b714fc354eb5465ce1609e62f", size = 13879950, upload-time = "2023-06-26T13:31:01.787Z" }, + { url = "https://files.pythonhosted.org/packages/8f/27/91894916e50627476cff1a4e4363ab6179d01077d71b9afed41d9e1f18bf/numpy-1.24.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3a86ed21e4f87050382c7bc96571755193c4c1392490744ac73d660e8f564a9", size = 14030228, upload-time = "2023-06-26T13:31:26.696Z" }, + { url = "https://files.pythonhosted.org/packages/7a/7c/d7b2a0417af6428440c0ad7cb9799073e507b1a465f827d058b826236964/numpy-1.24.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d11efb4dbecbdf22508d55e48d9c8384db795e1b7b51ea735289ff96613ff74d", size = 17311170, upload-time = "2023-06-26T13:31:56.615Z" }, + { url = "https://files.pythonhosted.org/packages/18/9d/e02ace5d7dfccee796c37b995c63322674daf88ae2f4a4724c5dd0afcc91/numpy-1.24.4-cp39-cp39-win32.whl", hash = "sha256:6620c0acd41dbcb368610bb2f4d83145674040025e5536954782467100aa8835", size = 12454918, upload-time = "2023-06-26T13:32:16.8Z" }, + { url = "https://files.pythonhosted.org/packages/63/38/6cc19d6b8bfa1d1a459daf2b3fe325453153ca7019976274b6f33d8b5663/numpy-1.24.4-cp39-cp39-win_amd64.whl", hash = "sha256:befe2bf740fd8373cf56149a5c23a0f601e82869598d41f8e188a0e9869926f8", size = 14867441, upload-time = "2023-06-26T13:32:40.521Z" }, + { url = "https://files.pythonhosted.org/packages/a4/fd/8dff40e25e937c94257455c237b9b6bf5a30d42dd1cc11555533be099492/numpy-1.24.4-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:31f13e25b4e304632a4619d0e0777662c2ffea99fcae2029556b17d8ff958aef", size = 19156590, upload-time = "2023-06-26T13:33:10.36Z" }, + { url = "https://files.pythonhosted.org/packages/42/e7/4bf953c6e05df90c6d351af69966384fed8e988d0e8c54dad7103b59f3ba/numpy-1.24.4-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95f7ac6540e95bc440ad77f56e520da5bf877f87dca58bd095288dce8940532a", size = 16705744, upload-time = "2023-06-26T13:33:36.703Z" }, + { url = "https://files.pythonhosted.org/packages/fc/dd/9106005eb477d022b60b3817ed5937a43dad8fd1f20b0610ea8a32fcb407/numpy-1.24.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:e98f220aa76ca2a977fe435f5b04d7b3470c0a2e6312907b37ba6068f26787f2", size = 14734290, upload-time = "2023-06-26T13:34:05.409Z" }, +] + +[[package]] +name = "numpy" +version = "2.0.2" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version == '3.9.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/a9/75/10dd1f8116a8b796cb2c737b674e02d02e80454bda953fa7e65d8c12b016/numpy-2.0.2.tar.gz", hash = "sha256:883c987dee1880e2a864ab0dc9892292582510604156762362d9326444636e78", size = 18902015, upload-time = "2024-08-26T20:19:40.945Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/21/91/3495b3237510f79f5d81f2508f9f13fea78ebfdf07538fc7444badda173d/numpy-2.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:51129a29dbe56f9ca83438b706e2e69a39892b5eda6cedcb6b0c9fdc9b0d3ece", size = 21165245, upload-time = "2024-08-26T20:04:14.625Z" }, + { url = "https://files.pythonhosted.org/packages/05/33/26178c7d437a87082d11019292dce6d3fe6f0e9026b7b2309cbf3e489b1d/numpy-2.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f15975dfec0cf2239224d80e32c3170b1d168335eaedee69da84fbe9f1f9cd04", size = 13738540, upload-time = "2024-08-26T20:04:36.784Z" }, + { url = "https://files.pythonhosted.org/packages/ec/31/cc46e13bf07644efc7a4bf68df2df5fb2a1a88d0cd0da9ddc84dc0033e51/numpy-2.0.2-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:8c5713284ce4e282544c68d1c3b2c7161d38c256d2eefc93c1d683cf47683e66", size = 5300623, upload-time = "2024-08-26T20:04:46.491Z" }, + { url = "https://files.pythonhosted.org/packages/6e/16/7bfcebf27bb4f9d7ec67332ffebee4d1bf085c84246552d52dbb548600e7/numpy-2.0.2-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:becfae3ddd30736fe1889a37f1f580e245ba79a5855bff5f2a29cb3ccc22dd7b", size = 6901774, upload-time = "2024-08-26T20:04:58.173Z" }, + { url = "https://files.pythonhosted.org/packages/f9/a3/561c531c0e8bf082c5bef509d00d56f82e0ea7e1e3e3a7fc8fa78742a6e5/numpy-2.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2da5960c3cf0df7eafefd806d4e612c5e19358de82cb3c343631188991566ccd", size = 13907081, upload-time = "2024-08-26T20:05:19.098Z" }, + { url = "https://files.pythonhosted.org/packages/fa/66/f7177ab331876200ac7563a580140643d1179c8b4b6a6b0fc9838de2a9b8/numpy-2.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:496f71341824ed9f3d2fd36cf3ac57ae2e0165c143b55c3a035ee219413f3318", size = 19523451, upload-time = "2024-08-26T20:05:47.479Z" }, + { url = "https://files.pythonhosted.org/packages/25/7f/0b209498009ad6453e4efc2c65bcdf0ae08a182b2b7877d7ab38a92dc542/numpy-2.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a61ec659f68ae254e4d237816e33171497e978140353c0c2038d46e63282d0c8", size = 19927572, upload-time = "2024-08-26T20:06:17.137Z" }, + { url = "https://files.pythonhosted.org/packages/3e/df/2619393b1e1b565cd2d4c4403bdd979621e2c4dea1f8532754b2598ed63b/numpy-2.0.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d731a1c6116ba289c1e9ee714b08a8ff882944d4ad631fd411106a30f083c326", size = 14400722, upload-time = "2024-08-26T20:06:39.16Z" }, + { url = "https://files.pythonhosted.org/packages/22/ad/77e921b9f256d5da36424ffb711ae79ca3f451ff8489eeca544d0701d74a/numpy-2.0.2-cp310-cp310-win32.whl", hash = "sha256:984d96121c9f9616cd33fbd0618b7f08e0cfc9600a7ee1d6fd9b239186d19d97", size = 6472170, upload-time = "2024-08-26T20:06:50.361Z" }, + { url = "https://files.pythonhosted.org/packages/10/05/3442317535028bc29cf0c0dd4c191a4481e8376e9f0db6bcf29703cadae6/numpy-2.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:c7b0be4ef08607dd04da4092faee0b86607f111d5ae68036f16cc787e250a131", size = 15905558, upload-time = "2024-08-26T20:07:13.881Z" }, + { url = "https://files.pythonhosted.org/packages/8b/cf/034500fb83041aa0286e0fb16e7c76e5c8b67c0711bb6e9e9737a717d5fe/numpy-2.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:49ca4decb342d66018b01932139c0961a8f9ddc7589611158cb3c27cbcf76448", size = 21169137, upload-time = "2024-08-26T20:07:45.345Z" }, + { url = "https://files.pythonhosted.org/packages/4a/d9/32de45561811a4b87fbdee23b5797394e3d1504b4a7cf40c10199848893e/numpy-2.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:11a76c372d1d37437857280aa142086476136a8c0f373b2e648ab2c8f18fb195", size = 13703552, upload-time = "2024-08-26T20:08:06.666Z" }, + { url = "https://files.pythonhosted.org/packages/c1/ca/2f384720020c7b244d22508cb7ab23d95f179fcfff33c31a6eeba8d6c512/numpy-2.0.2-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:807ec44583fd708a21d4a11d94aedf2f4f3c3719035c76a2bbe1fe8e217bdc57", size = 5298957, upload-time = "2024-08-26T20:08:15.83Z" }, + { url = "https://files.pythonhosted.org/packages/0e/78/a3e4f9fb6aa4e6fdca0c5428e8ba039408514388cf62d89651aade838269/numpy-2.0.2-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:8cafab480740e22f8d833acefed5cc87ce276f4ece12fdaa2e8903db2f82897a", size = 6905573, upload-time = "2024-08-26T20:08:27.185Z" }, + { url = "https://files.pythonhosted.org/packages/a0/72/cfc3a1beb2caf4efc9d0b38a15fe34025230da27e1c08cc2eb9bfb1c7231/numpy-2.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a15f476a45e6e5a3a79d8a14e62161d27ad897381fecfa4a09ed5322f2085669", size = 13914330, upload-time = "2024-08-26T20:08:48.058Z" }, + { url = "https://files.pythonhosted.org/packages/ba/a8/c17acf65a931ce551fee11b72e8de63bf7e8a6f0e21add4c937c83563538/numpy-2.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13e689d772146140a252c3a28501da66dfecd77490b498b168b501835041f951", size = 19534895, upload-time = "2024-08-26T20:09:16.536Z" }, + { url = "https://files.pythonhosted.org/packages/ba/86/8767f3d54f6ae0165749f84648da9dcc8cd78ab65d415494962c86fac80f/numpy-2.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9ea91dfb7c3d1c56a0e55657c0afb38cf1eeae4544c208dc465c3c9f3a7c09f9", size = 19937253, upload-time = "2024-08-26T20:09:46.263Z" }, + { url = "https://files.pythonhosted.org/packages/df/87/f76450e6e1c14e5bb1eae6836478b1028e096fd02e85c1c37674606ab752/numpy-2.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c1c9307701fec8f3f7a1e6711f9089c06e6284b3afbbcd259f7791282d660a15", size = 14414074, upload-time = "2024-08-26T20:10:08.483Z" }, + { url = "https://files.pythonhosted.org/packages/5c/ca/0f0f328e1e59f73754f06e1adfb909de43726d4f24c6a3f8805f34f2b0fa/numpy-2.0.2-cp311-cp311-win32.whl", hash = "sha256:a392a68bd329eafac5817e5aefeb39038c48b671afd242710b451e76090e81f4", size = 6470640, upload-time = "2024-08-26T20:10:19.732Z" }, + { url = "https://files.pythonhosted.org/packages/eb/57/3a3f14d3a759dcf9bf6e9eda905794726b758819df4663f217d658a58695/numpy-2.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:286cd40ce2b7d652a6f22efdfc6d1edf879440e53e76a75955bc0c826c7e64dc", size = 15910230, upload-time = "2024-08-26T20:10:43.413Z" }, + { url = "https://files.pythonhosted.org/packages/45/40/2e117be60ec50d98fa08c2f8c48e09b3edea93cfcabd5a9ff6925d54b1c2/numpy-2.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:df55d490dea7934f330006d0f81e8551ba6010a5bf035a249ef61a94f21c500b", size = 20895803, upload-time = "2024-08-26T20:11:13.916Z" }, + { url = "https://files.pythonhosted.org/packages/46/92/1b8b8dee833f53cef3e0a3f69b2374467789e0bb7399689582314df02651/numpy-2.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8df823f570d9adf0978347d1f926b2a867d5608f434a7cff7f7908c6570dcf5e", size = 13471835, upload-time = "2024-08-26T20:11:34.779Z" }, + { url = "https://files.pythonhosted.org/packages/7f/19/e2793bde475f1edaea6945be141aef6c8b4c669b90c90a300a8954d08f0a/numpy-2.0.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:9a92ae5c14811e390f3767053ff54eaee3bf84576d99a2456391401323f4ec2c", size = 5038499, upload-time = "2024-08-26T20:11:43.902Z" }, + { url = "https://files.pythonhosted.org/packages/e3/ff/ddf6dac2ff0dd50a7327bcdba45cb0264d0e96bb44d33324853f781a8f3c/numpy-2.0.2-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:a842d573724391493a97a62ebbb8e731f8a5dcc5d285dfc99141ca15a3302d0c", size = 6633497, upload-time = "2024-08-26T20:11:55.09Z" }, + { url = "https://files.pythonhosted.org/packages/72/21/67f36eac8e2d2cd652a2e69595a54128297cdcb1ff3931cfc87838874bd4/numpy-2.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c05e238064fc0610c840d1cf6a13bf63d7e391717d247f1bf0318172e759e692", size = 13621158, upload-time = "2024-08-26T20:12:14.95Z" }, + { url = "https://files.pythonhosted.org/packages/39/68/e9f1126d757653496dbc096cb429014347a36b228f5a991dae2c6b6cfd40/numpy-2.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0123ffdaa88fa4ab64835dcbde75dcdf89c453c922f18dced6e27c90d1d0ec5a", size = 19236173, upload-time = "2024-08-26T20:12:44.049Z" }, + { url = "https://files.pythonhosted.org/packages/d1/e9/1f5333281e4ebf483ba1c888b1d61ba7e78d7e910fdd8e6499667041cc35/numpy-2.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:96a55f64139912d61de9137f11bf39a55ec8faec288c75a54f93dfd39f7eb40c", size = 19634174, upload-time = "2024-08-26T20:13:13.634Z" }, + { url = "https://files.pythonhosted.org/packages/71/af/a469674070c8d8408384e3012e064299f7a2de540738a8e414dcfd639996/numpy-2.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ec9852fb39354b5a45a80bdab5ac02dd02b15f44b3804e9f00c556bf24b4bded", size = 14099701, upload-time = "2024-08-26T20:13:34.851Z" }, + { url = "https://files.pythonhosted.org/packages/d0/3d/08ea9f239d0e0e939b6ca52ad403c84a2bce1bde301a8eb4888c1c1543f1/numpy-2.0.2-cp312-cp312-win32.whl", hash = "sha256:671bec6496f83202ed2d3c8fdc486a8fc86942f2e69ff0e986140339a63bcbe5", size = 6174313, upload-time = "2024-08-26T20:13:45.653Z" }, + { url = "https://files.pythonhosted.org/packages/b2/b5/4ac39baebf1fdb2e72585c8352c56d063b6126be9fc95bd2bb5ef5770c20/numpy-2.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:cfd41e13fdc257aa5778496b8caa5e856dc4896d4ccf01841daee1d96465467a", size = 15606179, upload-time = "2024-08-26T20:14:08.786Z" }, + { url = "https://files.pythonhosted.org/packages/43/c1/41c8f6df3162b0c6ffd4437d729115704bd43363de0090c7f913cfbc2d89/numpy-2.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9059e10581ce4093f735ed23f3b9d283b9d517ff46009ddd485f1747eb22653c", size = 21169942, upload-time = "2024-08-26T20:14:40.108Z" }, + { url = "https://files.pythonhosted.org/packages/39/bc/fd298f308dcd232b56a4031fd6ddf11c43f9917fbc937e53762f7b5a3bb1/numpy-2.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:423e89b23490805d2a5a96fe40ec507407b8ee786d66f7328be214f9679df6dd", size = 13711512, upload-time = "2024-08-26T20:15:00.985Z" }, + { url = "https://files.pythonhosted.org/packages/96/ff/06d1aa3eeb1c614eda245c1ba4fb88c483bee6520d361641331872ac4b82/numpy-2.0.2-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:2b2955fa6f11907cf7a70dab0d0755159bca87755e831e47932367fc8f2f2d0b", size = 5306976, upload-time = "2024-08-26T20:15:10.876Z" }, + { url = "https://files.pythonhosted.org/packages/2d/98/121996dcfb10a6087a05e54453e28e58694a7db62c5a5a29cee14c6e047b/numpy-2.0.2-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:97032a27bd9d8988b9a97a8c4d2c9f2c15a81f61e2f21404d7e8ef00cb5be729", size = 6906494, upload-time = "2024-08-26T20:15:22.055Z" }, + { url = "https://files.pythonhosted.org/packages/15/31/9dffc70da6b9bbf7968f6551967fc21156207366272c2a40b4ed6008dc9b/numpy-2.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e795a8be3ddbac43274f18588329c72939870a16cae810c2b73461c40718ab1", size = 13912596, upload-time = "2024-08-26T20:15:42.452Z" }, + { url = "https://files.pythonhosted.org/packages/b9/14/78635daab4b07c0930c919d451b8bf8c164774e6a3413aed04a6d95758ce/numpy-2.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f26b258c385842546006213344c50655ff1555a9338e2e5e02a0756dc3e803dd", size = 19526099, upload-time = "2024-08-26T20:16:11.048Z" }, + { url = "https://files.pythonhosted.org/packages/26/4c/0eeca4614003077f68bfe7aac8b7496f04221865b3a5e7cb230c9d055afd/numpy-2.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5fec9451a7789926bcf7c2b8d187292c9f93ea30284802a0ab3f5be8ab36865d", size = 19932823, upload-time = "2024-08-26T20:16:40.171Z" }, + { url = "https://files.pythonhosted.org/packages/f1/46/ea25b98b13dccaebddf1a803f8c748680d972e00507cd9bc6dcdb5aa2ac1/numpy-2.0.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:9189427407d88ff25ecf8f12469d4d39d35bee1db5d39fc5c168c6f088a6956d", size = 14404424, upload-time = "2024-08-26T20:17:02.604Z" }, + { url = "https://files.pythonhosted.org/packages/c8/a6/177dd88d95ecf07e722d21008b1b40e681a929eb9e329684d449c36586b2/numpy-2.0.2-cp39-cp39-win32.whl", hash = "sha256:905d16e0c60200656500c95b6b8dca5d109e23cb24abc701d41c02d74c6b3afa", size = 6476809, upload-time = "2024-08-26T20:17:13.553Z" }, + { url = "https://files.pythonhosted.org/packages/ea/2b/7fc9f4e7ae5b507c1a3a21f0f15ed03e794c1242ea8a242ac158beb56034/numpy-2.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:a3f4ab0caa7f053f6797fcd4e1e25caee367db3112ef2b6ef82d749530768c73", size = 15911314, upload-time = "2024-08-26T20:17:36.72Z" }, + { url = "https://files.pythonhosted.org/packages/8f/3b/df5a870ac6a3be3a86856ce195ef42eec7ae50d2a202be1f5a4b3b340e14/numpy-2.0.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:7f0a0c6f12e07fa94133c8a67404322845220c06a9e80e85999afe727f7438b8", size = 21025288, upload-time = "2024-08-26T20:18:07.732Z" }, + { url = "https://files.pythonhosted.org/packages/2c/97/51af92f18d6f6f2d9ad8b482a99fb74e142d71372da5d834b3a2747a446e/numpy-2.0.2-pp39-pypy39_pp73-macosx_14_0_x86_64.whl", hash = "sha256:312950fdd060354350ed123c0e25a71327d3711584beaef30cdaa93320c392d4", size = 6762793, upload-time = "2024-08-26T20:18:19.125Z" }, + { url = "https://files.pythonhosted.org/packages/12/46/de1fbd0c1b5ccaa7f9a005b66761533e2f6a3e560096682683a223631fe9/numpy-2.0.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26df23238872200f63518dd2aa984cfca675d82469535dc7162dc2ee52d9dd5c", size = 19334885, upload-time = "2024-08-26T20:18:47.237Z" }, + { url = "https://files.pythonhosted.org/packages/cc/dc/d330a6faefd92b446ec0f0dfea4c3207bb1fef3c4771d19cf4543efd2c78/numpy-2.0.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a46288ec55ebbd58947d31d72be2c63cbf839f0a63b49cb755022310792a3385", size = 15828784, upload-time = "2024-08-26T20:19:11.19Z" }, +] + +[[package]] +name = "numpy" +version = "2.2.6" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version == '3.10.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/76/21/7d2a95e4bba9dc13d043ee156a356c0a8f0c6309dff6b21b4d71a073b8a8/numpy-2.2.6.tar.gz", hash = "sha256:e29554e2bef54a90aa5cc07da6ce955accb83f21ab5de01a62c8478897b264fd", size = 20276440, upload-time = "2025-05-17T22:38:04.611Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9a/3e/ed6db5be21ce87955c0cbd3009f2803f59fa08df21b5df06862e2d8e2bdd/numpy-2.2.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b412caa66f72040e6d268491a59f2c43bf03eb6c96dd8f0307829feb7fa2b6fb", size = 21165245, upload-time = "2025-05-17T21:27:58.555Z" }, + { url = "https://files.pythonhosted.org/packages/22/c2/4b9221495b2a132cc9d2eb862e21d42a009f5a60e45fc44b00118c174bff/numpy-2.2.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8e41fd67c52b86603a91c1a505ebaef50b3314de0213461c7a6e99c9a3beff90", size = 14360048, upload-time = "2025-05-17T21:28:21.406Z" }, + { url = "https://files.pythonhosted.org/packages/fd/77/dc2fcfc66943c6410e2bf598062f5959372735ffda175b39906d54f02349/numpy-2.2.6-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:37e990a01ae6ec7fe7fa1c26c55ecb672dd98b19c3d0e1d1f326fa13cb38d163", size = 5340542, upload-time = "2025-05-17T21:28:30.931Z" }, + { url = "https://files.pythonhosted.org/packages/7a/4f/1cb5fdc353a5f5cc7feb692db9b8ec2c3d6405453f982435efc52561df58/numpy-2.2.6-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:5a6429d4be8ca66d889b7cf70f536a397dc45ba6faeb5f8c5427935d9592e9cf", size = 6878301, upload-time = "2025-05-17T21:28:41.613Z" }, + { url = "https://files.pythonhosted.org/packages/eb/17/96a3acd228cec142fcb8723bd3cc39c2a474f7dcf0a5d16731980bcafa95/numpy-2.2.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:efd28d4e9cd7d7a8d39074a4d44c63eda73401580c5c76acda2ce969e0a38e83", size = 14297320, upload-time = "2025-05-17T21:29:02.78Z" }, + { url = "https://files.pythonhosted.org/packages/b4/63/3de6a34ad7ad6646ac7d2f55ebc6ad439dbbf9c4370017c50cf403fb19b5/numpy-2.2.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc7b73d02efb0e18c000e9ad8b83480dfcd5dfd11065997ed4c6747470ae8915", size = 16801050, upload-time = "2025-05-17T21:29:27.675Z" }, + { url = "https://files.pythonhosted.org/packages/07/b6/89d837eddef52b3d0cec5c6ba0456c1bf1b9ef6a6672fc2b7873c3ec4e2e/numpy-2.2.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:74d4531beb257d2c3f4b261bfb0fc09e0f9ebb8842d82a7b4209415896adc680", size = 15807034, upload-time = "2025-05-17T21:29:51.102Z" }, + { url = "https://files.pythonhosted.org/packages/01/c8/dc6ae86e3c61cfec1f178e5c9f7858584049b6093f843bca541f94120920/numpy-2.2.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8fc377d995680230e83241d8a96def29f204b5782f371c532579b4f20607a289", size = 18614185, upload-time = "2025-05-17T21:30:18.703Z" }, + { url = "https://files.pythonhosted.org/packages/5b/c5/0064b1b7e7c89137b471ccec1fd2282fceaae0ab3a9550f2568782d80357/numpy-2.2.6-cp310-cp310-win32.whl", hash = "sha256:b093dd74e50a8cba3e873868d9e93a85b78e0daf2e98c6797566ad8044e8363d", size = 6527149, upload-time = "2025-05-17T21:30:29.788Z" }, + { url = "https://files.pythonhosted.org/packages/a3/dd/4b822569d6b96c39d1215dbae0582fd99954dcbcf0c1a13c61783feaca3f/numpy-2.2.6-cp310-cp310-win_amd64.whl", hash = "sha256:f0fd6321b839904e15c46e0d257fdd101dd7f530fe03fd6359c1ea63738703f3", size = 12904620, upload-time = "2025-05-17T21:30:48.994Z" }, + { url = "https://files.pythonhosted.org/packages/da/a8/4f83e2aa666a9fbf56d6118faaaf5f1974d456b1823fda0a176eff722839/numpy-2.2.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f9f1adb22318e121c5c69a09142811a201ef17ab257a1e66ca3025065b7f53ae", size = 21176963, upload-time = "2025-05-17T21:31:19.36Z" }, + { url = "https://files.pythonhosted.org/packages/b3/2b/64e1affc7972decb74c9e29e5649fac940514910960ba25cd9af4488b66c/numpy-2.2.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c820a93b0255bc360f53eca31a0e676fd1101f673dda8da93454a12e23fc5f7a", size = 14406743, upload-time = "2025-05-17T21:31:41.087Z" }, + { url = "https://files.pythonhosted.org/packages/4a/9f/0121e375000b5e50ffdd8b25bf78d8e1a5aa4cca3f185d41265198c7b834/numpy-2.2.6-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3d70692235e759f260c3d837193090014aebdf026dfd167834bcba43e30c2a42", size = 5352616, upload-time = "2025-05-17T21:31:50.072Z" }, + { url = "https://files.pythonhosted.org/packages/31/0d/b48c405c91693635fbe2dcd7bc84a33a602add5f63286e024d3b6741411c/numpy-2.2.6-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:481b49095335f8eed42e39e8041327c05b0f6f4780488f61286ed3c01368d491", size = 6889579, upload-time = "2025-05-17T21:32:01.712Z" }, + { url = "https://files.pythonhosted.org/packages/52/b8/7f0554d49b565d0171eab6e99001846882000883998e7b7d9f0d98b1f934/numpy-2.2.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b64d8d4d17135e00c8e346e0a738deb17e754230d7e0810ac5012750bbd85a5a", size = 14312005, upload-time = "2025-05-17T21:32:23.332Z" }, + { url = "https://files.pythonhosted.org/packages/b3/dd/2238b898e51bd6d389b7389ffb20d7f4c10066d80351187ec8e303a5a475/numpy-2.2.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba10f8411898fc418a521833e014a77d3ca01c15b0c6cdcce6a0d2897e6dbbdf", size = 16821570, upload-time = "2025-05-17T21:32:47.991Z" }, + { url = "https://files.pythonhosted.org/packages/83/6c/44d0325722cf644f191042bf47eedad61c1e6df2432ed65cbe28509d404e/numpy-2.2.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:bd48227a919f1bafbdda0583705e547892342c26fb127219d60a5c36882609d1", size = 15818548, upload-time = "2025-05-17T21:33:11.728Z" }, + { url = "https://files.pythonhosted.org/packages/ae/9d/81e8216030ce66be25279098789b665d49ff19eef08bfa8cb96d4957f422/numpy-2.2.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9551a499bf125c1d4f9e250377c1ee2eddd02e01eac6644c080162c0c51778ab", size = 18620521, upload-time = "2025-05-17T21:33:39.139Z" }, + { url = "https://files.pythonhosted.org/packages/6a/fd/e19617b9530b031db51b0926eed5345ce8ddc669bb3bc0044b23e275ebe8/numpy-2.2.6-cp311-cp311-win32.whl", hash = "sha256:0678000bb9ac1475cd454c6b8c799206af8107e310843532b04d49649c717a47", size = 6525866, upload-time = "2025-05-17T21:33:50.273Z" }, + { url = "https://files.pythonhosted.org/packages/31/0a/f354fb7176b81747d870f7991dc763e157a934c717b67b58456bc63da3df/numpy-2.2.6-cp311-cp311-win_amd64.whl", hash = "sha256:e8213002e427c69c45a52bbd94163084025f533a55a59d6f9c5b820774ef3303", size = 12907455, upload-time = "2025-05-17T21:34:09.135Z" }, + { url = "https://files.pythonhosted.org/packages/82/5d/c00588b6cf18e1da539b45d3598d3557084990dcc4331960c15ee776ee41/numpy-2.2.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:41c5a21f4a04fa86436124d388f6ed60a9343a6f767fced1a8a71c3fbca038ff", size = 20875348, upload-time = "2025-05-17T21:34:39.648Z" }, + { url = "https://files.pythonhosted.org/packages/66/ee/560deadcdde6c2f90200450d5938f63a34b37e27ebff162810f716f6a230/numpy-2.2.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:de749064336d37e340f640b05f24e9e3dd678c57318c7289d222a8a2f543e90c", size = 14119362, upload-time = "2025-05-17T21:35:01.241Z" }, + { url = "https://files.pythonhosted.org/packages/3c/65/4baa99f1c53b30adf0acd9a5519078871ddde8d2339dc5a7fde80d9d87da/numpy-2.2.6-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:894b3a42502226a1cac872f840030665f33326fc3dac8e57c607905773cdcde3", size = 5084103, upload-time = "2025-05-17T21:35:10.622Z" }, + { url = "https://files.pythonhosted.org/packages/cc/89/e5a34c071a0570cc40c9a54eb472d113eea6d002e9ae12bb3a8407fb912e/numpy-2.2.6-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:71594f7c51a18e728451bb50cc60a3ce4e6538822731b2933209a1f3614e9282", size = 6625382, upload-time = "2025-05-17T21:35:21.414Z" }, + { url = "https://files.pythonhosted.org/packages/f8/35/8c80729f1ff76b3921d5c9487c7ac3de9b2a103b1cd05e905b3090513510/numpy-2.2.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2618db89be1b4e05f7a1a847a9c1c0abd63e63a1607d892dd54668dd92faf87", size = 14018462, upload-time = "2025-05-17T21:35:42.174Z" }, + { url = "https://files.pythonhosted.org/packages/8c/3d/1e1db36cfd41f895d266b103df00ca5b3cbe965184df824dec5c08c6b803/numpy-2.2.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd83c01228a688733f1ded5201c678f0c53ecc1006ffbc404db9f7a899ac6249", size = 16527618, upload-time = "2025-05-17T21:36:06.711Z" }, + { url = "https://files.pythonhosted.org/packages/61/c6/03ed30992602c85aa3cd95b9070a514f8b3c33e31124694438d88809ae36/numpy-2.2.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:37c0ca431f82cd5fa716eca9506aefcabc247fb27ba69c5062a6d3ade8cf8f49", size = 15505511, upload-time = "2025-05-17T21:36:29.965Z" }, + { url = "https://files.pythonhosted.org/packages/b7/25/5761d832a81df431e260719ec45de696414266613c9ee268394dd5ad8236/numpy-2.2.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fe27749d33bb772c80dcd84ae7e8df2adc920ae8297400dabec45f0dedb3f6de", size = 18313783, upload-time = "2025-05-17T21:36:56.883Z" }, + { url = "https://files.pythonhosted.org/packages/57/0a/72d5a3527c5ebffcd47bde9162c39fae1f90138c961e5296491ce778e682/numpy-2.2.6-cp312-cp312-win32.whl", hash = "sha256:4eeaae00d789f66c7a25ac5f34b71a7035bb474e679f410e5e1a94deb24cf2d4", size = 6246506, upload-time = "2025-05-17T21:37:07.368Z" }, + { url = "https://files.pythonhosted.org/packages/36/fa/8c9210162ca1b88529ab76b41ba02d433fd54fecaf6feb70ef9f124683f1/numpy-2.2.6-cp312-cp312-win_amd64.whl", hash = "sha256:c1f9540be57940698ed329904db803cf7a402f3fc200bfe599334c9bd84a40b2", size = 12614190, upload-time = "2025-05-17T21:37:26.213Z" }, + { url = "https://files.pythonhosted.org/packages/f9/5c/6657823f4f594f72b5471f1db1ab12e26e890bb2e41897522d134d2a3e81/numpy-2.2.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0811bb762109d9708cca4d0b13c4f67146e3c3b7cf8d34018c722adb2d957c84", size = 20867828, upload-time = "2025-05-17T21:37:56.699Z" }, + { url = "https://files.pythonhosted.org/packages/dc/9e/14520dc3dadf3c803473bd07e9b2bd1b69bc583cb2497b47000fed2fa92f/numpy-2.2.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:287cc3162b6f01463ccd86be154f284d0893d2b3ed7292439ea97eafa8170e0b", size = 14143006, upload-time = "2025-05-17T21:38:18.291Z" }, + { url = "https://files.pythonhosted.org/packages/4f/06/7e96c57d90bebdce9918412087fc22ca9851cceaf5567a45c1f404480e9e/numpy-2.2.6-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:f1372f041402e37e5e633e586f62aa53de2eac8d98cbfb822806ce4bbefcb74d", size = 5076765, upload-time = "2025-05-17T21:38:27.319Z" }, + { url = "https://files.pythonhosted.org/packages/73/ed/63d920c23b4289fdac96ddbdd6132e9427790977d5457cd132f18e76eae0/numpy-2.2.6-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:55a4d33fa519660d69614a9fad433be87e5252f4b03850642f88993f7b2ca566", size = 6617736, upload-time = "2025-05-17T21:38:38.141Z" }, + { url = "https://files.pythonhosted.org/packages/85/c5/e19c8f99d83fd377ec8c7e0cf627a8049746da54afc24ef0a0cb73d5dfb5/numpy-2.2.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f92729c95468a2f4f15e9bb94c432a9229d0d50de67304399627a943201baa2f", size = 14010719, upload-time = "2025-05-17T21:38:58.433Z" }, + { url = "https://files.pythonhosted.org/packages/19/49/4df9123aafa7b539317bf6d342cb6d227e49f7a35b99c287a6109b13dd93/numpy-2.2.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bc23a79bfabc5d056d106f9befb8d50c31ced2fbc70eedb8155aec74a45798f", size = 16526072, upload-time = "2025-05-17T21:39:22.638Z" }, + { url = "https://files.pythonhosted.org/packages/b2/6c/04b5f47f4f32f7c2b0e7260442a8cbcf8168b0e1a41ff1495da42f42a14f/numpy-2.2.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e3143e4451880bed956e706a3220b4e5cf6172ef05fcc397f6f36a550b1dd868", size = 15503213, upload-time = "2025-05-17T21:39:45.865Z" }, + { url = "https://files.pythonhosted.org/packages/17/0a/5cd92e352c1307640d5b6fec1b2ffb06cd0dabe7d7b8227f97933d378422/numpy-2.2.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b4f13750ce79751586ae2eb824ba7e1e8dba64784086c98cdbbcc6a42112ce0d", size = 18316632, upload-time = "2025-05-17T21:40:13.331Z" }, + { url = "https://files.pythonhosted.org/packages/f0/3b/5cba2b1d88760ef86596ad0f3d484b1cbff7c115ae2429678465057c5155/numpy-2.2.6-cp313-cp313-win32.whl", hash = "sha256:5beb72339d9d4fa36522fc63802f469b13cdbe4fdab4a288f0c441b74272ebfd", size = 6244532, upload-time = "2025-05-17T21:43:46.099Z" }, + { url = "https://files.pythonhosted.org/packages/cb/3b/d58c12eafcb298d4e6d0d40216866ab15f59e55d148a5658bb3132311fcf/numpy-2.2.6-cp313-cp313-win_amd64.whl", hash = "sha256:b0544343a702fa80c95ad5d3d608ea3599dd54d4632df855e4c8d24eb6ecfa1c", size = 12610885, upload-time = "2025-05-17T21:44:05.145Z" }, + { url = "https://files.pythonhosted.org/packages/6b/9e/4bf918b818e516322db999ac25d00c75788ddfd2d2ade4fa66f1f38097e1/numpy-2.2.6-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0bca768cd85ae743b2affdc762d617eddf3bcf8724435498a1e80132d04879e6", size = 20963467, upload-time = "2025-05-17T21:40:44Z" }, + { url = "https://files.pythonhosted.org/packages/61/66/d2de6b291507517ff2e438e13ff7b1e2cdbdb7cb40b3ed475377aece69f9/numpy-2.2.6-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:fc0c5673685c508a142ca65209b4e79ed6740a4ed6b2267dbba90f34b0b3cfda", size = 14225144, upload-time = "2025-05-17T21:41:05.695Z" }, + { url = "https://files.pythonhosted.org/packages/e4/25/480387655407ead912e28ba3a820bc69af9adf13bcbe40b299d454ec011f/numpy-2.2.6-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:5bd4fc3ac8926b3819797a7c0e2631eb889b4118a9898c84f585a54d475b7e40", size = 5200217, upload-time = "2025-05-17T21:41:15.903Z" }, + { url = "https://files.pythonhosted.org/packages/aa/4a/6e313b5108f53dcbf3aca0c0f3e9c92f4c10ce57a0a721851f9785872895/numpy-2.2.6-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:fee4236c876c4e8369388054d02d0e9bb84821feb1a64dd59e137e6511a551f8", size = 6712014, upload-time = "2025-05-17T21:41:27.321Z" }, + { url = "https://files.pythonhosted.org/packages/b7/30/172c2d5c4be71fdf476e9de553443cf8e25feddbe185e0bd88b096915bcc/numpy-2.2.6-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e1dda9c7e08dc141e0247a5b8f49cf05984955246a327d4c48bda16821947b2f", size = 14077935, upload-time = "2025-05-17T21:41:49.738Z" }, + { url = "https://files.pythonhosted.org/packages/12/fb/9e743f8d4e4d3c710902cf87af3512082ae3d43b945d5d16563f26ec251d/numpy-2.2.6-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f447e6acb680fd307f40d3da4852208af94afdfab89cf850986c3ca00562f4fa", size = 16600122, upload-time = "2025-05-17T21:42:14.046Z" }, + { url = "https://files.pythonhosted.org/packages/12/75/ee20da0e58d3a66f204f38916757e01e33a9737d0b22373b3eb5a27358f9/numpy-2.2.6-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:389d771b1623ec92636b0786bc4ae56abafad4a4c513d36a55dce14bd9ce8571", size = 15586143, upload-time = "2025-05-17T21:42:37.464Z" }, + { url = "https://files.pythonhosted.org/packages/76/95/bef5b37f29fc5e739947e9ce5179ad402875633308504a52d188302319c8/numpy-2.2.6-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8e9ace4a37db23421249ed236fdcdd457d671e25146786dfc96835cd951aa7c1", size = 18385260, upload-time = "2025-05-17T21:43:05.189Z" }, + { url = "https://files.pythonhosted.org/packages/09/04/f2f83279d287407cf36a7a8053a5abe7be3622a4363337338f2585e4afda/numpy-2.2.6-cp313-cp313t-win32.whl", hash = "sha256:038613e9fb8c72b0a41f025a7e4c3f0b7a1b5d768ece4796b674c8f3fe13efff", size = 6377225, upload-time = "2025-05-17T21:43:16.254Z" }, + { url = "https://files.pythonhosted.org/packages/67/0e/35082d13c09c02c011cf21570543d202ad929d961c02a147493cb0c2bdf5/numpy-2.2.6-cp313-cp313t-win_amd64.whl", hash = "sha256:6031dd6dfecc0cf9f668681a37648373bddd6421fff6c66ec1624eed0180ee06", size = 12771374, upload-time = "2025-05-17T21:43:35.479Z" }, + { url = "https://files.pythonhosted.org/packages/9e/3b/d94a75f4dbf1ef5d321523ecac21ef23a3cd2ac8b78ae2aac40873590229/numpy-2.2.6-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0b605b275d7bd0c640cad4e5d30fa701a8d59302e127e5f79138ad62762c3e3d", size = 21040391, upload-time = "2025-05-17T21:44:35.948Z" }, + { url = "https://files.pythonhosted.org/packages/17/f4/09b2fa1b58f0fb4f7c7963a1649c64c4d315752240377ed74d9cd878f7b5/numpy-2.2.6-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:7befc596a7dc9da8a337f79802ee8adb30a552a94f792b9c9d18c840055907db", size = 6786754, upload-time = "2025-05-17T21:44:47.446Z" }, + { url = "https://files.pythonhosted.org/packages/af/30/feba75f143bdc868a1cc3f44ccfa6c4b9ec522b36458e738cd00f67b573f/numpy-2.2.6-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce47521a4754c8f4593837384bd3424880629f718d87c5d44f8ed763edd63543", size = 16643476, upload-time = "2025-05-17T21:45:11.871Z" }, + { url = "https://files.pythonhosted.org/packages/37/48/ac2a9584402fb6c0cd5b5d1a91dcf176b15760130dd386bbafdbfe3640bf/numpy-2.2.6-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d042d24c90c41b54fd506da306759e06e568864df8ec17ccc17e9e884634fd00", size = 12812666, upload-time = "2025-05-17T21:45:31.426Z" }, +] + +[[package]] +name = "numpy" +version = "2.3.2" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/37/7d/3fec4199c5ffb892bed55cff901e4f39a58c81df9c44c280499e92cad264/numpy-2.3.2.tar.gz", hash = "sha256:e0486a11ec30cdecb53f184d496d1c6a20786c81e55e41640270130056f8ee48", size = 20489306, upload-time = "2025-07-24T21:32:07.553Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/96/26/1320083986108998bd487e2931eed2aeedf914b6e8905431487543ec911d/numpy-2.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:852ae5bed3478b92f093e30f785c98e0cb62fa0a939ed057c31716e18a7a22b9", size = 21259016, upload-time = "2025-07-24T20:24:35.214Z" }, + { url = "https://files.pythonhosted.org/packages/c4/2b/792b341463fa93fc7e55abbdbe87dac316c5b8cb5e94fb7a59fb6fa0cda5/numpy-2.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7a0e27186e781a69959d0230dd9909b5e26024f8da10683bd6344baea1885168", size = 14451158, upload-time = "2025-07-24T20:24:58.397Z" }, + { url = "https://files.pythonhosted.org/packages/b7/13/e792d7209261afb0c9f4759ffef6135b35c77c6349a151f488f531d13595/numpy-2.3.2-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:f0a1a8476ad77a228e41619af2fa9505cf69df928e9aaa165746584ea17fed2b", size = 5379817, upload-time = "2025-07-24T20:25:07.746Z" }, + { url = "https://files.pythonhosted.org/packages/49/ce/055274fcba4107c022b2113a213c7287346563f48d62e8d2a5176ad93217/numpy-2.3.2-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:cbc95b3813920145032412f7e33d12080f11dc776262df1712e1638207dde9e8", size = 6913606, upload-time = "2025-07-24T20:25:18.84Z" }, + { url = "https://files.pythonhosted.org/packages/17/f2/e4d72e6bc5ff01e2ab613dc198d560714971900c03674b41947e38606502/numpy-2.3.2-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f75018be4980a7324edc5930fe39aa391d5734531b1926968605416ff58c332d", size = 14589652, upload-time = "2025-07-24T20:25:40.356Z" }, + { url = "https://files.pythonhosted.org/packages/c8/b0/fbeee3000a51ebf7222016e2939b5c5ecf8000a19555d04a18f1e02521b8/numpy-2.3.2-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:20b8200721840f5621b7bd03f8dcd78de33ec522fc40dc2641aa09537df010c3", size = 16938816, upload-time = "2025-07-24T20:26:05.721Z" }, + { url = "https://files.pythonhosted.org/packages/a9/ec/2f6c45c3484cc159621ea8fc000ac5a86f1575f090cac78ac27193ce82cd/numpy-2.3.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1f91e5c028504660d606340a084db4b216567ded1056ea2b4be4f9d10b67197f", size = 16370512, upload-time = "2025-07-24T20:26:30.545Z" }, + { url = "https://files.pythonhosted.org/packages/b5/01/dd67cf511850bd7aefd6347aaae0956ed415abea741ae107834aae7d6d4e/numpy-2.3.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:fb1752a3bb9a3ad2d6b090b88a9a0ae1cd6f004ef95f75825e2f382c183b2097", size = 18884947, upload-time = "2025-07-24T20:26:58.24Z" }, + { url = "https://files.pythonhosted.org/packages/a7/17/2cf60fd3e6a61d006778735edf67a222787a8c1a7842aed43ef96d777446/numpy-2.3.2-cp311-cp311-win32.whl", hash = "sha256:4ae6863868aaee2f57503c7a5052b3a2807cf7a3914475e637a0ecd366ced220", size = 6599494, upload-time = "2025-07-24T20:27:09.786Z" }, + { url = "https://files.pythonhosted.org/packages/d5/03/0eade211c504bda872a594f045f98ddcc6caef2b7c63610946845e304d3f/numpy-2.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:240259d6564f1c65424bcd10f435145a7644a65a6811cfc3201c4a429ba79170", size = 13087889, upload-time = "2025-07-24T20:27:29.558Z" }, + { url = "https://files.pythonhosted.org/packages/13/32/2c7979d39dafb2a25087e12310fc7f3b9d3c7d960df4f4bc97955ae0ce1d/numpy-2.3.2-cp311-cp311-win_arm64.whl", hash = "sha256:4209f874d45f921bde2cff1ffcd8a3695f545ad2ffbef6d3d3c6768162efab89", size = 10459560, upload-time = "2025-07-24T20:27:46.803Z" }, + { url = "https://files.pythonhosted.org/packages/00/6d/745dd1c1c5c284d17725e5c802ca4d45cfc6803519d777f087b71c9f4069/numpy-2.3.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:bc3186bea41fae9d8e90c2b4fb5f0a1f5a690682da79b92574d63f56b529080b", size = 20956420, upload-time = "2025-07-24T20:28:18.002Z" }, + { url = "https://files.pythonhosted.org/packages/bc/96/e7b533ea5740641dd62b07a790af5d9d8fec36000b8e2d0472bd7574105f/numpy-2.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2f4f0215edb189048a3c03bd5b19345bdfa7b45a7a6f72ae5945d2a28272727f", size = 14184660, upload-time = "2025-07-24T20:28:39.522Z" }, + { url = "https://files.pythonhosted.org/packages/2b/53/102c6122db45a62aa20d1b18c9986f67e6b97e0d6fbc1ae13e3e4c84430c/numpy-2.3.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:8b1224a734cd509f70816455c3cffe13a4f599b1bf7130f913ba0e2c0b2006c0", size = 5113382, upload-time = "2025-07-24T20:28:48.544Z" }, + { url = "https://files.pythonhosted.org/packages/2b/21/376257efcbf63e624250717e82b4fae93d60178f09eb03ed766dbb48ec9c/numpy-2.3.2-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:3dcf02866b977a38ba3ec10215220609ab9667378a9e2150615673f3ffd6c73b", size = 6647258, upload-time = "2025-07-24T20:28:59.104Z" }, + { url = "https://files.pythonhosted.org/packages/91/ba/f4ebf257f08affa464fe6036e13f2bf9d4642a40228781dc1235da81be9f/numpy-2.3.2-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:572d5512df5470f50ada8d1972c5f1082d9a0b7aa5944db8084077570cf98370", size = 14281409, upload-time = "2025-07-24T20:40:30.298Z" }, + { url = "https://files.pythonhosted.org/packages/59/ef/f96536f1df42c668cbacb727a8c6da7afc9c05ece6d558927fb1722693e1/numpy-2.3.2-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8145dd6d10df13c559d1e4314df29695613575183fa2e2d11fac4c208c8a1f73", size = 16641317, upload-time = "2025-07-24T20:40:56.625Z" }, + { url = "https://files.pythonhosted.org/packages/f6/a7/af813a7b4f9a42f498dde8a4c6fcbff8100eed00182cc91dbaf095645f38/numpy-2.3.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:103ea7063fa624af04a791c39f97070bf93b96d7af7eb23530cd087dc8dbe9dc", size = 16056262, upload-time = "2025-07-24T20:41:20.797Z" }, + { url = "https://files.pythonhosted.org/packages/8b/5d/41c4ef8404caaa7f05ed1cfb06afe16a25895260eacbd29b4d84dff2920b/numpy-2.3.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fc927d7f289d14f5e037be917539620603294454130b6de200091e23d27dc9be", size = 18579342, upload-time = "2025-07-24T20:41:50.753Z" }, + { url = "https://files.pythonhosted.org/packages/a1/4f/9950e44c5a11636f4a3af6e825ec23003475cc9a466edb7a759ed3ea63bd/numpy-2.3.2-cp312-cp312-win32.whl", hash = "sha256:d95f59afe7f808c103be692175008bab926b59309ade3e6d25009e9a171f7036", size = 6320610, upload-time = "2025-07-24T20:42:01.551Z" }, + { url = "https://files.pythonhosted.org/packages/7c/2f/244643a5ce54a94f0a9a2ab578189c061e4a87c002e037b0829dd77293b6/numpy-2.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:9e196ade2400c0c737d93465327d1ae7c06c7cb8a1756121ebf54b06ca183c7f", size = 12786292, upload-time = "2025-07-24T20:42:20.738Z" }, + { url = "https://files.pythonhosted.org/packages/54/cd/7b5f49d5d78db7badab22d8323c1b6ae458fbf86c4fdfa194ab3cd4eb39b/numpy-2.3.2-cp312-cp312-win_arm64.whl", hash = "sha256:ee807923782faaf60d0d7331f5e86da7d5e3079e28b291973c545476c2b00d07", size = 10194071, upload-time = "2025-07-24T20:42:36.657Z" }, + { url = "https://files.pythonhosted.org/packages/1c/c0/c6bb172c916b00700ed3bf71cb56175fd1f7dbecebf8353545d0b5519f6c/numpy-2.3.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c8d9727f5316a256425892b043736d63e89ed15bbfe6556c5ff4d9d4448ff3b3", size = 20949074, upload-time = "2025-07-24T20:43:07.813Z" }, + { url = "https://files.pythonhosted.org/packages/20/4e/c116466d22acaf4573e58421c956c6076dc526e24a6be0903219775d862e/numpy-2.3.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:efc81393f25f14d11c9d161e46e6ee348637c0a1e8a54bf9dedc472a3fae993b", size = 14177311, upload-time = "2025-07-24T20:43:29.335Z" }, + { url = "https://files.pythonhosted.org/packages/78/45/d4698c182895af189c463fc91d70805d455a227261d950e4e0f1310c2550/numpy-2.3.2-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:dd937f088a2df683cbb79dda9a772b62a3e5a8a7e76690612c2737f38c6ef1b6", size = 5106022, upload-time = "2025-07-24T20:43:37.999Z" }, + { url = "https://files.pythonhosted.org/packages/9f/76/3e6880fef4420179309dba72a8c11f6166c431cf6dee54c577af8906f914/numpy-2.3.2-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:11e58218c0c46c80509186e460d79fbdc9ca1eb8d8aee39d8f2dc768eb781089", size = 6640135, upload-time = "2025-07-24T20:43:49.28Z" }, + { url = "https://files.pythonhosted.org/packages/34/fa/87ff7f25b3c4ce9085a62554460b7db686fef1e0207e8977795c7b7d7ba1/numpy-2.3.2-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5ad4ebcb683a1f99f4f392cc522ee20a18b2bb12a2c1c42c3d48d5a1adc9d3d2", size = 14278147, upload-time = "2025-07-24T20:44:10.328Z" }, + { url = "https://files.pythonhosted.org/packages/1d/0f/571b2c7a3833ae419fe69ff7b479a78d313581785203cc70a8db90121b9a/numpy-2.3.2-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:938065908d1d869c7d75d8ec45f735a034771c6ea07088867f713d1cd3bbbe4f", size = 16635989, upload-time = "2025-07-24T20:44:34.88Z" }, + { url = "https://files.pythonhosted.org/packages/24/5a/84ae8dca9c9a4c592fe11340b36a86ffa9fd3e40513198daf8a97839345c/numpy-2.3.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:66459dccc65d8ec98cc7df61307b64bf9e08101f9598755d42d8ae65d9a7a6ee", size = 16053052, upload-time = "2025-07-24T20:44:58.872Z" }, + { url = "https://files.pythonhosted.org/packages/57/7c/e5725d99a9133b9813fcf148d3f858df98511686e853169dbaf63aec6097/numpy-2.3.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a7af9ed2aa9ec5950daf05bb11abc4076a108bd3c7db9aa7251d5f107079b6a6", size = 18577955, upload-time = "2025-07-24T20:45:26.714Z" }, + { url = "https://files.pythonhosted.org/packages/ae/11/7c546fcf42145f29b71e4d6f429e96d8d68e5a7ba1830b2e68d7418f0bbd/numpy-2.3.2-cp313-cp313-win32.whl", hash = "sha256:906a30249315f9c8e17b085cc5f87d3f369b35fedd0051d4a84686967bdbbd0b", size = 6311843, upload-time = "2025-07-24T20:49:24.444Z" }, + { url = "https://files.pythonhosted.org/packages/aa/6f/a428fd1cb7ed39b4280d057720fed5121b0d7754fd2a9768640160f5517b/numpy-2.3.2-cp313-cp313-win_amd64.whl", hash = "sha256:c63d95dc9d67b676e9108fe0d2182987ccb0f11933c1e8959f42fa0da8d4fa56", size = 12782876, upload-time = "2025-07-24T20:49:43.227Z" }, + { url = "https://files.pythonhosted.org/packages/65/85/4ea455c9040a12595fb6c43f2c217257c7b52dd0ba332c6a6c1d28b289fe/numpy-2.3.2-cp313-cp313-win_arm64.whl", hash = "sha256:b05a89f2fb84d21235f93de47129dd4f11c16f64c87c33f5e284e6a3a54e43f2", size = 10192786, upload-time = "2025-07-24T20:49:59.443Z" }, + { url = "https://files.pythonhosted.org/packages/80/23/8278f40282d10c3f258ec3ff1b103d4994bcad78b0cba9208317f6bb73da/numpy-2.3.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:4e6ecfeddfa83b02318f4d84acf15fbdbf9ded18e46989a15a8b6995dfbf85ab", size = 21047395, upload-time = "2025-07-24T20:45:58.821Z" }, + { url = "https://files.pythonhosted.org/packages/1f/2d/624f2ce4a5df52628b4ccd16a4f9437b37c35f4f8a50d00e962aae6efd7a/numpy-2.3.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:508b0eada3eded10a3b55725b40806a4b855961040180028f52580c4729916a2", size = 14300374, upload-time = "2025-07-24T20:46:20.207Z" }, + { url = "https://files.pythonhosted.org/packages/f6/62/ff1e512cdbb829b80a6bd08318a58698867bca0ca2499d101b4af063ee97/numpy-2.3.2-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:754d6755d9a7588bdc6ac47dc4ee97867271b17cee39cb87aef079574366db0a", size = 5228864, upload-time = "2025-07-24T20:46:30.58Z" }, + { url = "https://files.pythonhosted.org/packages/7d/8e/74bc18078fff03192d4032cfa99d5a5ca937807136d6f5790ce07ca53515/numpy-2.3.2-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:a9f66e7d2b2d7712410d3bc5684149040ef5f19856f20277cd17ea83e5006286", size = 6737533, upload-time = "2025-07-24T20:46:46.111Z" }, + { url = "https://files.pythonhosted.org/packages/19/ea/0731efe2c9073ccca5698ef6a8c3667c4cf4eea53fcdcd0b50140aba03bc/numpy-2.3.2-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:de6ea4e5a65d5a90c7d286ddff2b87f3f4ad61faa3db8dabe936b34c2275b6f8", size = 14352007, upload-time = "2025-07-24T20:47:07.1Z" }, + { url = "https://files.pythonhosted.org/packages/cf/90/36be0865f16dfed20f4bc7f75235b963d5939707d4b591f086777412ff7b/numpy-2.3.2-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a3ef07ec8cbc8fc9e369c8dcd52019510c12da4de81367d8b20bc692aa07573a", size = 16701914, upload-time = "2025-07-24T20:47:32.459Z" }, + { url = "https://files.pythonhosted.org/packages/94/30/06cd055e24cb6c38e5989a9e747042b4e723535758e6153f11afea88c01b/numpy-2.3.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:27c9f90e7481275c7800dc9c24b7cc40ace3fdb970ae4d21eaff983a32f70c91", size = 16132708, upload-time = "2025-07-24T20:47:58.129Z" }, + { url = "https://files.pythonhosted.org/packages/9a/14/ecede608ea73e58267fd7cb78f42341b3b37ba576e778a1a06baffbe585c/numpy-2.3.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:07b62978075b67eee4065b166d000d457c82a1efe726cce608b9db9dd66a73a5", size = 18651678, upload-time = "2025-07-24T20:48:25.402Z" }, + { url = "https://files.pythonhosted.org/packages/40/f3/2fe6066b8d07c3685509bc24d56386534c008b462a488b7f503ba82b8923/numpy-2.3.2-cp313-cp313t-win32.whl", hash = "sha256:c771cfac34a4f2c0de8e8c97312d07d64fd8f8ed45bc9f5726a7e947270152b5", size = 6441832, upload-time = "2025-07-24T20:48:37.181Z" }, + { url = "https://files.pythonhosted.org/packages/0b/ba/0937d66d05204d8f28630c9c60bc3eda68824abde4cf756c4d6aad03b0c6/numpy-2.3.2-cp313-cp313t-win_amd64.whl", hash = "sha256:72dbebb2dcc8305c431b2836bcc66af967df91be793d63a24e3d9b741374c450", size = 12927049, upload-time = "2025-07-24T20:48:56.24Z" }, + { url = "https://files.pythonhosted.org/packages/e9/ed/13542dd59c104d5e654dfa2ac282c199ba64846a74c2c4bcdbc3a0f75df1/numpy-2.3.2-cp313-cp313t-win_arm64.whl", hash = "sha256:72c6df2267e926a6d5286b0a6d556ebe49eae261062059317837fda12ddf0c1a", size = 10262935, upload-time = "2025-07-24T20:49:13.136Z" }, + { url = "https://files.pythonhosted.org/packages/c9/7c/7659048aaf498f7611b783e000c7268fcc4dcf0ce21cd10aad7b2e8f9591/numpy-2.3.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:448a66d052d0cf14ce9865d159bfc403282c9bc7bb2a31b03cc18b651eca8b1a", size = 20950906, upload-time = "2025-07-24T20:50:30.346Z" }, + { url = "https://files.pythonhosted.org/packages/80/db/984bea9d4ddf7112a04cfdfb22b1050af5757864cfffe8e09e44b7f11a10/numpy-2.3.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:546aaf78e81b4081b2eba1d105c3b34064783027a06b3ab20b6eba21fb64132b", size = 14185607, upload-time = "2025-07-24T20:50:51.923Z" }, + { url = "https://files.pythonhosted.org/packages/e4/76/b3d6f414f4eca568f469ac112a3b510938d892bc5a6c190cb883af080b77/numpy-2.3.2-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:87c930d52f45df092f7578889711a0768094debf73cfcde105e2d66954358125", size = 5114110, upload-time = "2025-07-24T20:51:01.041Z" }, + { url = "https://files.pythonhosted.org/packages/9e/d2/6f5e6826abd6bca52392ed88fe44a4b52aacb60567ac3bc86c67834c3a56/numpy-2.3.2-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:8dc082ea901a62edb8f59713c6a7e28a85daddcb67454c839de57656478f5b19", size = 6642050, upload-time = "2025-07-24T20:51:11.64Z" }, + { url = "https://files.pythonhosted.org/packages/c4/43/f12b2ade99199e39c73ad182f103f9d9791f48d885c600c8e05927865baf/numpy-2.3.2-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:af58de8745f7fa9ca1c0c7c943616c6fe28e75d0c81f5c295810e3c83b5be92f", size = 14296292, upload-time = "2025-07-24T20:51:33.488Z" }, + { url = "https://files.pythonhosted.org/packages/5d/f9/77c07d94bf110a916b17210fac38680ed8734c236bfed9982fd8524a7b47/numpy-2.3.2-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fed5527c4cf10f16c6d0b6bee1f89958bccb0ad2522c8cadc2efd318bcd545f5", size = 16638913, upload-time = "2025-07-24T20:51:58.517Z" }, + { url = "https://files.pythonhosted.org/packages/9b/d1/9d9f2c8ea399cc05cfff8a7437453bd4e7d894373a93cdc46361bbb49a7d/numpy-2.3.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:095737ed986e00393ec18ec0b21b47c22889ae4b0cd2d5e88342e08b01141f58", size = 16071180, upload-time = "2025-07-24T20:52:22.827Z" }, + { url = "https://files.pythonhosted.org/packages/4c/41/82e2c68aff2a0c9bf315e47d61951099fed65d8cb2c8d9dc388cb87e947e/numpy-2.3.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b5e40e80299607f597e1a8a247ff8d71d79c5b52baa11cc1cce30aa92d2da6e0", size = 18576809, upload-time = "2025-07-24T20:52:51.015Z" }, + { url = "https://files.pythonhosted.org/packages/14/14/4b4fd3efb0837ed252d0f583c5c35a75121038a8c4e065f2c259be06d2d8/numpy-2.3.2-cp314-cp314-win32.whl", hash = "sha256:7d6e390423cc1f76e1b8108c9b6889d20a7a1f59d9a60cac4a050fa734d6c1e2", size = 6366410, upload-time = "2025-07-24T20:56:44.949Z" }, + { url = "https://files.pythonhosted.org/packages/11/9e/b4c24a6b8467b61aced5c8dc7dcfce23621baa2e17f661edb2444a418040/numpy-2.3.2-cp314-cp314-win_amd64.whl", hash = "sha256:b9d0878b21e3918d76d2209c924ebb272340da1fb51abc00f986c258cd5e957b", size = 12918821, upload-time = "2025-07-24T20:57:06.479Z" }, + { url = "https://files.pythonhosted.org/packages/0e/0f/0dc44007c70b1007c1cef86b06986a3812dd7106d8f946c09cfa75782556/numpy-2.3.2-cp314-cp314-win_arm64.whl", hash = "sha256:2738534837c6a1d0c39340a190177d7d66fdf432894f469728da901f8f6dc910", size = 10477303, upload-time = "2025-07-24T20:57:22.879Z" }, + { url = "https://files.pythonhosted.org/packages/8b/3e/075752b79140b78ddfc9c0a1634d234cfdbc6f9bbbfa6b7504e445ad7d19/numpy-2.3.2-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:4d002ecf7c9b53240be3bb69d80f86ddbd34078bae04d87be81c1f58466f264e", size = 21047524, upload-time = "2025-07-24T20:53:22.086Z" }, + { url = "https://files.pythonhosted.org/packages/fe/6d/60e8247564a72426570d0e0ea1151b95ce5bd2f1597bb878a18d32aec855/numpy-2.3.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:293b2192c6bcce487dbc6326de5853787f870aeb6c43f8f9c6496db5b1781e45", size = 14300519, upload-time = "2025-07-24T20:53:44.053Z" }, + { url = "https://files.pythonhosted.org/packages/4d/73/d8326c442cd428d47a067070c3ac6cc3b651a6e53613a1668342a12d4479/numpy-2.3.2-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:0a4f2021a6da53a0d580d6ef5db29947025ae8b35b3250141805ea9a32bbe86b", size = 5228972, upload-time = "2025-07-24T20:53:53.81Z" }, + { url = "https://files.pythonhosted.org/packages/34/2e/e71b2d6dad075271e7079db776196829019b90ce3ece5c69639e4f6fdc44/numpy-2.3.2-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:9c144440db4bf3bb6372d2c3e49834cc0ff7bb4c24975ab33e01199e645416f2", size = 6737439, upload-time = "2025-07-24T20:54:04.742Z" }, + { url = "https://files.pythonhosted.org/packages/15/b0/d004bcd56c2c5e0500ffc65385eb6d569ffd3363cb5e593ae742749b2daa/numpy-2.3.2-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f92d6c2a8535dc4fe4419562294ff957f83a16ebdec66df0805e473ffaad8bd0", size = 14352479, upload-time = "2025-07-24T20:54:25.819Z" }, + { url = "https://files.pythonhosted.org/packages/11/e3/285142fcff8721e0c99b51686426165059874c150ea9ab898e12a492e291/numpy-2.3.2-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cefc2219baa48e468e3db7e706305fcd0c095534a192a08f31e98d83a7d45fb0", size = 16702805, upload-time = "2025-07-24T20:54:50.814Z" }, + { url = "https://files.pythonhosted.org/packages/33/c3/33b56b0e47e604af2c7cd065edca892d180f5899599b76830652875249a3/numpy-2.3.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:76c3e9501ceb50b2ff3824c3589d5d1ab4ac857b0ee3f8f49629d0de55ecf7c2", size = 16133830, upload-time = "2025-07-24T20:55:17.306Z" }, + { url = "https://files.pythonhosted.org/packages/6e/ae/7b1476a1f4d6a48bc669b8deb09939c56dd2a439db1ab03017844374fb67/numpy-2.3.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:122bf5ed9a0221b3419672493878ba4967121514b1d7d4656a7580cd11dddcbf", size = 18652665, upload-time = "2025-07-24T20:55:46.665Z" }, + { url = "https://files.pythonhosted.org/packages/14/ba/5b5c9978c4bb161034148ade2de9db44ec316fab89ce8c400db0e0c81f86/numpy-2.3.2-cp314-cp314t-win32.whl", hash = "sha256:6f1ae3dcb840edccc45af496f312528c15b1f79ac318169d094e85e4bb35fdf1", size = 6514777, upload-time = "2025-07-24T20:55:57.66Z" }, + { url = "https://files.pythonhosted.org/packages/eb/46/3dbaf0ae7c17cdc46b9f662c56da2054887b8d9e737c1476f335c83d33db/numpy-2.3.2-cp314-cp314t-win_amd64.whl", hash = "sha256:087ffc25890d89a43536f75c5fe8770922008758e8eeeef61733957041ed2f9b", size = 13111856, upload-time = "2025-07-24T20:56:17.318Z" }, + { url = "https://files.pythonhosted.org/packages/c1/9e/1652778bce745a67b5fe05adde60ed362d38eb17d919a540e813d30f6874/numpy-2.3.2-cp314-cp314t-win_arm64.whl", hash = "sha256:092aeb3449833ea9c0bf0089d70c29ae480685dd2377ec9cdbbb620257f84631", size = 10544226, upload-time = "2025-07-24T20:56:34.509Z" }, + { url = "https://files.pythonhosted.org/packages/cf/ea/50ebc91d28b275b23b7128ef25c3d08152bc4068f42742867e07a870a42a/numpy-2.3.2-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:14a91ebac98813a49bc6aa1a0dfc09513dcec1d97eaf31ca21a87221a1cdcb15", size = 21130338, upload-time = "2025-07-24T20:57:54.37Z" }, + { url = "https://files.pythonhosted.org/packages/9f/57/cdd5eac00dd5f137277355c318a955c0d8fb8aa486020c22afd305f8b88f/numpy-2.3.2-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:71669b5daae692189540cffc4c439468d35a3f84f0c88b078ecd94337f6cb0ec", size = 14375776, upload-time = "2025-07-24T20:58:16.303Z" }, + { url = "https://files.pythonhosted.org/packages/83/85/27280c7f34fcd305c2209c0cdca4d70775e4859a9eaa92f850087f8dea50/numpy-2.3.2-pp311-pypy311_pp73-macosx_14_0_arm64.whl", hash = "sha256:69779198d9caee6e547adb933941ed7520f896fd9656834c300bdf4dd8642712", size = 5304882, upload-time = "2025-07-24T20:58:26.199Z" }, + { url = "https://files.pythonhosted.org/packages/48/b4/6500b24d278e15dd796f43824e69939d00981d37d9779e32499e823aa0aa/numpy-2.3.2-pp311-pypy311_pp73-macosx_14_0_x86_64.whl", hash = "sha256:2c3271cc4097beb5a60f010bcc1cc204b300bb3eafb4399376418a83a1c6373c", size = 6818405, upload-time = "2025-07-24T20:58:37.341Z" }, + { url = "https://files.pythonhosted.org/packages/9b/c9/142c1e03f199d202da8e980c2496213509291b6024fd2735ad28ae7065c7/numpy-2.3.2-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8446acd11fe3dc1830568c941d44449fd5cb83068e5c70bd5a470d323d448296", size = 14419651, upload-time = "2025-07-24T20:58:59.048Z" }, + { url = "https://files.pythonhosted.org/packages/8b/95/8023e87cbea31a750a6c00ff9427d65ebc5fef104a136bfa69f76266d614/numpy-2.3.2-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:aa098a5ab53fa407fded5870865c6275a5cd4101cfdef8d6fafc48286a96e981", size = 16760166, upload-time = "2025-07-24T21:28:56.38Z" }, + { url = "https://files.pythonhosted.org/packages/78/e3/6690b3f85a05506733c7e90b577e4762517404ea78bab2ca3a5cb1aeb78d/numpy-2.3.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:6936aff90dda378c09bea075af0d9c675fe3a977a9d2402f95a87f440f59f619", size = 12977811, upload-time = "2025-07-24T21:29:18.234Z" }, +] + +[[package]] +name = "packaging" +version = "25.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727, upload-time = "2025-04-19T11:48:59.673Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" }, +] + +[[package]] +name = "pandas" +version = "2.0.3" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +dependencies = [ + { name = "numpy", version = "1.24.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "python-dateutil", marker = "python_full_version < '3.9'" }, + { name = "pytz", marker = "python_full_version < '3.9'" }, + { name = "tzdata", marker = "python_full_version < '3.9'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/a7/824332581e258b5aa4f3763ecb2a797e5f9a54269044ba2e50ac19936b32/pandas-2.0.3.tar.gz", hash = "sha256:c02f372a88e0d17f36d3093a644c73cfc1788e876a7c4bcb4020a77512e2043c", size = 5284455, upload-time = "2023-06-28T23:19:33.371Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3c/b2/0d4a5729ce1ce11630c4fc5d5522a33b967b3ca146c210f58efde7c40e99/pandas-2.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e4c7c9f27a4185304c7caf96dc7d91bc60bc162221152de697c98eb0b2648dd8", size = 11760908, upload-time = "2023-06-28T23:15:57.001Z" }, + { url = "https://files.pythonhosted.org/packages/4a/f6/f620ca62365d83e663a255a41b08d2fc2eaf304e0b8b21bb6d62a7390fe3/pandas-2.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f167beed68918d62bffb6ec64f2e1d8a7d297a038f86d4aed056b9493fca407f", size = 10823486, upload-time = "2023-06-28T23:16:06.863Z" }, + { url = "https://files.pythonhosted.org/packages/c2/59/cb4234bc9b968c57e81861b306b10cd8170272c57b098b724d3de5eda124/pandas-2.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce0c6f76a0f1ba361551f3e6dceaff06bde7514a374aa43e33b588ec10420183", size = 11571897, upload-time = "2023-06-28T23:16:14.208Z" }, + { url = "https://files.pythonhosted.org/packages/e3/59/35a2892bf09ded9c1bf3804461efe772836a5261ef5dfb4e264ce813ff99/pandas-2.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba619e410a21d8c387a1ea6e8a0e49bb42216474436245718d7f2e88a2f8d7c0", size = 12306421, upload-time = "2023-06-28T23:16:23.26Z" }, + { url = "https://files.pythonhosted.org/packages/94/71/3a0c25433c54bb29b48e3155b959ac78f4c4f2f06f94d8318aac612cb80f/pandas-2.0.3-cp310-cp310-win32.whl", hash = "sha256:3ef285093b4fe5058eefd756100a367f27029913760773c8bf1d2d8bebe5d210", size = 9540792, upload-time = "2023-06-28T23:16:30.876Z" }, + { url = "https://files.pythonhosted.org/packages/ed/30/b97456e7063edac0e5a405128065f0cd2033adfe3716fb2256c186bd41d0/pandas-2.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:9ee1a69328d5c36c98d8e74db06f4ad518a1840e8ccb94a4ba86920986bb617e", size = 10664333, upload-time = "2023-06-28T23:16:39.209Z" }, + { url = "https://files.pythonhosted.org/packages/b3/92/a5e5133421b49e901a12e02a6a7ef3a0130e10d13db8cb657fdd0cba3b90/pandas-2.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b084b91d8d66ab19f5bb3256cbd5ea661848338301940e17f4492b2ce0801fe8", size = 11645672, upload-time = "2023-06-28T23:16:47.601Z" }, + { url = "https://files.pythonhosted.org/packages/8f/bb/aea1fbeed5b474cb8634364718abe9030d7cc7a30bf51f40bd494bbc89a2/pandas-2.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:37673e3bdf1551b95bf5d4ce372b37770f9529743d2498032439371fc7b7eb26", size = 10693229, upload-time = "2023-06-28T23:16:56.397Z" }, + { url = "https://files.pythonhosted.org/packages/d6/90/e7d387f1a416b14e59290baa7a454a90d719baebbf77433ff1bdcc727800/pandas-2.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9cb1e14fdb546396b7e1b923ffaeeac24e4cedd14266c3497216dd4448e4f2d", size = 11581591, upload-time = "2023-06-28T23:17:04.234Z" }, + { url = "https://files.pythonhosted.org/packages/d0/28/88b81881c056376254618fad622a5e94b5126db8c61157ea1910cd1c040a/pandas-2.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d9cd88488cceb7635aebb84809d087468eb33551097d600c6dad13602029c2df", size = 12219370, upload-time = "2023-06-28T23:17:11.783Z" }, + { url = "https://files.pythonhosted.org/packages/e4/a5/212b9039e25bf8ebb97e417a96660e3dc925dacd3f8653d531b8f7fd9be4/pandas-2.0.3-cp311-cp311-win32.whl", hash = "sha256:694888a81198786f0e164ee3a581df7d505024fbb1f15202fc7db88a71d84ebd", size = 9482935, upload-time = "2023-06-28T23:17:21.376Z" }, + { url = "https://files.pythonhosted.org/packages/9e/71/756a1be6bee0209d8c0d8c5e3b9fc72c00373f384a4017095ec404aec3ad/pandas-2.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:6a21ab5c89dcbd57f78d0ae16630b090eec626360085a4148693def5452d8a6b", size = 10607692, upload-time = "2023-06-28T23:17:28.824Z" }, + { url = "https://files.pythonhosted.org/packages/78/a8/07dd10f90ca915ed914853cd57f79bfc22e1ef4384ab56cb4336d2fc1f2a/pandas-2.0.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9e4da0d45e7f34c069fe4d522359df7d23badf83abc1d1cef398895822d11061", size = 11653303, upload-time = "2023-06-28T23:17:36.329Z" }, + { url = "https://files.pythonhosted.org/packages/53/c3/f8e87361f7fdf42012def602bfa2a593423c729f5cb7c97aed7f51be66ac/pandas-2.0.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:32fca2ee1b0d93dd71d979726b12b61faa06aeb93cf77468776287f41ff8fdc5", size = 10710932, upload-time = "2023-06-28T23:17:49.875Z" }, + { url = "https://files.pythonhosted.org/packages/a7/87/828d50c81ce0f434163bf70b925a0eec6076808e0bca312a79322b141f66/pandas-2.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:258d3624b3ae734490e4d63c430256e716f488c4fcb7c8e9bde2d3aa46c29089", size = 11684018, upload-time = "2023-06-28T23:18:05.845Z" }, + { url = "https://files.pythonhosted.org/packages/f8/7f/5b047effafbdd34e52c9e2d7e44f729a0655efafb22198c45cf692cdc157/pandas-2.0.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9eae3dc34fa1aa7772dd3fc60270d13ced7346fcbcfee017d3132ec625e23bb0", size = 12353723, upload-time = "2023-06-28T23:18:17.631Z" }, + { url = "https://files.pythonhosted.org/packages/ea/ae/26a2eda7fa581347d69e51f93892493b2074ef3352ac71033c9f32c52389/pandas-2.0.3-cp38-cp38-win32.whl", hash = "sha256:f3421a7afb1a43f7e38e82e844e2bca9a6d793d66c1a7f9f0ff39a795bbc5e02", size = 9646403, upload-time = "2023-06-28T23:18:24.328Z" }, + { url = "https://files.pythonhosted.org/packages/c3/6c/ea362eef61f05553aaf1a24b3e96b2d0603f5dc71a3bd35688a24ed88843/pandas-2.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:69d7f3884c95da3a31ef82b7618af5710dba95bb885ffab339aad925c3e8ce78", size = 10777638, upload-time = "2023-06-28T23:18:30.947Z" }, + { url = "https://files.pythonhosted.org/packages/f8/c7/cfef920b7b457dff6928e824896cb82367650ea127d048ee0b820026db4f/pandas-2.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5247fb1ba347c1261cbbf0fcfba4a3121fbb4029d95d9ef4dc45406620b25c8b", size = 11834160, upload-time = "2023-06-28T23:18:40.332Z" }, + { url = "https://files.pythonhosted.org/packages/6c/1c/689c9d99bc4e5d366a5fd871f0bcdee98a6581e240f96b78d2d08f103774/pandas-2.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:81af086f4543c9d8bb128328b5d32e9986e0c84d3ee673a2ac6fb57fd14f755e", size = 10862752, upload-time = "2023-06-28T23:18:50.016Z" }, + { url = "https://files.pythonhosted.org/packages/cc/b8/4d082f41c27c95bf90485d1447b647cc7e5680fea75e315669dc6e4cb398/pandas-2.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1994c789bf12a7c5098277fb43836ce090f1073858c10f9220998ac74f37c69b", size = 11715852, upload-time = "2023-06-28T23:19:00.594Z" }, + { url = "https://files.pythonhosted.org/packages/9e/0d/91a9fd2c202f2b1d97a38ab591890f86480ecbb596cbc56d035f6f23fdcc/pandas-2.0.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ec591c48e29226bcbb316e0c1e9423622bc7a4eaf1ef7c3c9fa1a3981f89641", size = 12398496, upload-time = "2023-06-28T23:19:11.78Z" }, + { url = "https://files.pythonhosted.org/packages/26/7d/d8aa0a2c4f3f5f8ea59fb946c8eafe8f508090ca73e2b08a9af853c1103e/pandas-2.0.3-cp39-cp39-win32.whl", hash = "sha256:04dbdbaf2e4d46ca8da896e1805bc04eb85caa9a82e259e8eed00254d5e0c682", size = 9630766, upload-time = "2023-06-28T23:19:18.182Z" }, + { url = "https://files.pythonhosted.org/packages/9a/f2/0ad053856debbe90c83de1b4f05915f85fd2146f20faf9daa3b320d36df3/pandas-2.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:1168574b036cd8b93abc746171c9b4f1b83467438a5e45909fed645cf8692dbc", size = 10755902, upload-time = "2023-06-28T23:19:25.151Z" }, +] + +[[package]] +name = "pandas" +version = "2.3.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", +] +dependencies = [ + { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, + { name = "numpy", version = "2.3.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "python-dateutil", marker = "python_full_version >= '3.9'" }, + { name = "pytz", marker = "python_full_version >= '3.9'" }, + { name = "tzdata", marker = "python_full_version >= '3.9'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d1/6f/75aa71f8a14267117adeeed5d21b204770189c0a0025acbdc03c337b28fc/pandas-2.3.1.tar.gz", hash = "sha256:0a95b9ac964fe83ce317827f80304d37388ea77616b1425f0ae41c9d2d0d7bb2", size = 4487493, upload-time = "2025-07-07T19:20:04.079Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c4/ca/aa97b47287221fa37a49634532e520300088e290b20d690b21ce3e448143/pandas-2.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:22c2e866f7209ebc3a8f08d75766566aae02bcc91d196935a1d9e59c7b990ac9", size = 11542731, upload-time = "2025-07-07T19:18:12.619Z" }, + { url = "https://files.pythonhosted.org/packages/80/bf/7938dddc5f01e18e573dcfb0f1b8c9357d9b5fa6ffdee6e605b92efbdff2/pandas-2.3.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3583d348546201aff730c8c47e49bc159833f971c2899d6097bce68b9112a4f1", size = 10790031, upload-time = "2025-07-07T19:18:16.611Z" }, + { url = "https://files.pythonhosted.org/packages/ee/2f/9af748366763b2a494fed477f88051dbf06f56053d5c00eba652697e3f94/pandas-2.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f951fbb702dacd390561e0ea45cdd8ecfa7fb56935eb3dd78e306c19104b9b0", size = 11724083, upload-time = "2025-07-07T19:18:20.512Z" }, + { url = "https://files.pythonhosted.org/packages/2c/95/79ab37aa4c25d1e7df953dde407bb9c3e4ae47d154bc0dd1692f3a6dcf8c/pandas-2.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd05b72ec02ebfb993569b4931b2e16fbb4d6ad6ce80224a3ee838387d83a191", size = 12342360, upload-time = "2025-07-07T19:18:23.194Z" }, + { url = "https://files.pythonhosted.org/packages/75/a7/d65e5d8665c12c3c6ff5edd9709d5836ec9b6f80071b7f4a718c6106e86e/pandas-2.3.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:1b916a627919a247d865aed068eb65eb91a344b13f5b57ab9f610b7716c92de1", size = 13202098, upload-time = "2025-07-07T19:18:25.558Z" }, + { url = "https://files.pythonhosted.org/packages/65/f3/4c1dbd754dbaa79dbf8b537800cb2fa1a6e534764fef50ab1f7533226c5c/pandas-2.3.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:fe67dc676818c186d5a3d5425250e40f179c2a89145df477dd82945eaea89e97", size = 13837228, upload-time = "2025-07-07T19:18:28.344Z" }, + { url = "https://files.pythonhosted.org/packages/3f/d6/d7f5777162aa9b48ec3910bca5a58c9b5927cfd9cfde3aa64322f5ba4b9f/pandas-2.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:2eb789ae0274672acbd3c575b0598d213345660120a257b47b5dafdc618aec83", size = 11336561, upload-time = "2025-07-07T19:18:31.211Z" }, + { url = "https://files.pythonhosted.org/packages/76/1c/ccf70029e927e473a4476c00e0d5b32e623bff27f0402d0a92b7fc29bb9f/pandas-2.3.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2b0540963d83431f5ce8870ea02a7430adca100cec8a050f0811f8e31035541b", size = 11566608, upload-time = "2025-07-07T19:18:33.86Z" }, + { url = "https://files.pythonhosted.org/packages/ec/d3/3c37cb724d76a841f14b8f5fe57e5e3645207cc67370e4f84717e8bb7657/pandas-2.3.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fe7317f578c6a153912bd2292f02e40c1d8f253e93c599e82620c7f69755c74f", size = 10823181, upload-time = "2025-07-07T19:18:36.151Z" }, + { url = "https://files.pythonhosted.org/packages/8a/4c/367c98854a1251940edf54a4df0826dcacfb987f9068abf3e3064081a382/pandas-2.3.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e6723a27ad7b244c0c79d8e7007092d7c8f0f11305770e2f4cd778b3ad5f9f85", size = 11793570, upload-time = "2025-07-07T19:18:38.385Z" }, + { url = "https://files.pythonhosted.org/packages/07/5f/63760ff107bcf5146eee41b38b3985f9055e710a72fdd637b791dea3495c/pandas-2.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3462c3735fe19f2638f2c3a40bd94ec2dc5ba13abbb032dd2fa1f540a075509d", size = 12378887, upload-time = "2025-07-07T19:18:41.284Z" }, + { url = "https://files.pythonhosted.org/packages/15/53/f31a9b4dfe73fe4711c3a609bd8e60238022f48eacedc257cd13ae9327a7/pandas-2.3.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:98bcc8b5bf7afed22cc753a28bc4d9e26e078e777066bc53fac7904ddef9a678", size = 13230957, upload-time = "2025-07-07T19:18:44.187Z" }, + { url = "https://files.pythonhosted.org/packages/e0/94/6fce6bf85b5056d065e0a7933cba2616dcb48596f7ba3c6341ec4bcc529d/pandas-2.3.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4d544806b485ddf29e52d75b1f559142514e60ef58a832f74fb38e48d757b299", size = 13883883, upload-time = "2025-07-07T19:18:46.498Z" }, + { url = "https://files.pythonhosted.org/packages/c8/7b/bdcb1ed8fccb63d04bdb7635161d0ec26596d92c9d7a6cce964e7876b6c1/pandas-2.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:b3cd4273d3cb3707b6fffd217204c52ed92859533e31dc03b7c5008aa933aaab", size = 11340212, upload-time = "2025-07-07T19:18:49.293Z" }, + { url = "https://files.pythonhosted.org/packages/46/de/b8445e0f5d217a99fe0eeb2f4988070908979bec3587c0633e5428ab596c/pandas-2.3.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:689968e841136f9e542020698ee1c4fbe9caa2ed2213ae2388dc7b81721510d3", size = 11588172, upload-time = "2025-07-07T19:18:52.054Z" }, + { url = "https://files.pythonhosted.org/packages/1e/e0/801cdb3564e65a5ac041ab99ea6f1d802a6c325bb6e58c79c06a3f1cd010/pandas-2.3.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:025e92411c16cbe5bb2a4abc99732a6b132f439b8aab23a59fa593eb00704232", size = 10717365, upload-time = "2025-07-07T19:18:54.785Z" }, + { url = "https://files.pythonhosted.org/packages/51/a5/c76a8311833c24ae61a376dbf360eb1b1c9247a5d9c1e8b356563b31b80c/pandas-2.3.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b7ff55f31c4fcb3e316e8f7fa194566b286d6ac430afec0d461163312c5841e", size = 11280411, upload-time = "2025-07-07T19:18:57.045Z" }, + { url = "https://files.pythonhosted.org/packages/da/01/e383018feba0a1ead6cf5fe8728e5d767fee02f06a3d800e82c489e5daaf/pandas-2.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7dcb79bf373a47d2a40cf7232928eb7540155abbc460925c2c96d2d30b006eb4", size = 11988013, upload-time = "2025-07-07T19:18:59.771Z" }, + { url = "https://files.pythonhosted.org/packages/5b/14/cec7760d7c9507f11c97d64f29022e12a6cc4fc03ac694535e89f88ad2ec/pandas-2.3.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:56a342b231e8862c96bdb6ab97170e203ce511f4d0429589c8ede1ee8ece48b8", size = 12767210, upload-time = "2025-07-07T19:19:02.944Z" }, + { url = "https://files.pythonhosted.org/packages/50/b9/6e2d2c6728ed29fb3d4d4d302504fb66f1a543e37eb2e43f352a86365cdf/pandas-2.3.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ca7ed14832bce68baef331f4d7f294411bed8efd032f8109d690df45e00c4679", size = 13440571, upload-time = "2025-07-07T19:19:06.82Z" }, + { url = "https://files.pythonhosted.org/packages/80/a5/3a92893e7399a691bad7664d977cb5e7c81cf666c81f89ea76ba2bff483d/pandas-2.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:ac942bfd0aca577bef61f2bc8da8147c4ef6879965ef883d8e8d5d2dc3e744b8", size = 10987601, upload-time = "2025-07-07T19:19:09.589Z" }, + { url = "https://files.pythonhosted.org/packages/32/ed/ff0a67a2c5505e1854e6715586ac6693dd860fbf52ef9f81edee200266e7/pandas-2.3.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:9026bd4a80108fac2239294a15ef9003c4ee191a0f64b90f170b40cfb7cf2d22", size = 11531393, upload-time = "2025-07-07T19:19:12.245Z" }, + { url = "https://files.pythonhosted.org/packages/c7/db/d8f24a7cc9fb0972adab0cc80b6817e8bef888cfd0024eeb5a21c0bb5c4a/pandas-2.3.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6de8547d4fdb12421e2d047a2c446c623ff4c11f47fddb6b9169eb98ffba485a", size = 10668750, upload-time = "2025-07-07T19:19:14.612Z" }, + { url = "https://files.pythonhosted.org/packages/0f/b0/80f6ec783313f1e2356b28b4fd8d2148c378370045da918c73145e6aab50/pandas-2.3.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:782647ddc63c83133b2506912cc6b108140a38a37292102aaa19c81c83db2928", size = 11342004, upload-time = "2025-07-07T19:19:16.857Z" }, + { url = "https://files.pythonhosted.org/packages/e9/e2/20a317688435470872885e7fc8f95109ae9683dec7c50be29b56911515a5/pandas-2.3.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ba6aff74075311fc88504b1db890187a3cd0f887a5b10f5525f8e2ef55bfdb9", size = 12050869, upload-time = "2025-07-07T19:19:19.265Z" }, + { url = "https://files.pythonhosted.org/packages/55/79/20d746b0a96c67203a5bee5fb4e00ac49c3e8009a39e1f78de264ecc5729/pandas-2.3.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e5635178b387bd2ba4ac040f82bc2ef6e6b500483975c4ebacd34bec945fda12", size = 12750218, upload-time = "2025-07-07T19:19:21.547Z" }, + { url = "https://files.pythonhosted.org/packages/7c/0f/145c8b41e48dbf03dd18fdd7f24f8ba95b8254a97a3379048378f33e7838/pandas-2.3.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6f3bf5ec947526106399a9e1d26d40ee2b259c66422efdf4de63c848492d91bb", size = 13416763, upload-time = "2025-07-07T19:19:23.939Z" }, + { url = "https://files.pythonhosted.org/packages/b2/c0/54415af59db5cdd86a3d3bf79863e8cc3fa9ed265f0745254061ac09d5f2/pandas-2.3.1-cp313-cp313-win_amd64.whl", hash = "sha256:1c78cf43c8fde236342a1cb2c34bcff89564a7bfed7e474ed2fffa6aed03a956", size = 10987482, upload-time = "2025-07-07T19:19:42.699Z" }, + { url = "https://files.pythonhosted.org/packages/48/64/2fd2e400073a1230e13b8cd604c9bc95d9e3b962e5d44088ead2e8f0cfec/pandas-2.3.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:8dfc17328e8da77be3cf9f47509e5637ba8f137148ed0e9b5241e1baf526e20a", size = 12029159, upload-time = "2025-07-07T19:19:26.362Z" }, + { url = "https://files.pythonhosted.org/packages/d8/0a/d84fd79b0293b7ef88c760d7dca69828d867c89b6d9bc52d6a27e4d87316/pandas-2.3.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:ec6c851509364c59a5344458ab935e6451b31b818be467eb24b0fe89bd05b6b9", size = 11393287, upload-time = "2025-07-07T19:19:29.157Z" }, + { url = "https://files.pythonhosted.org/packages/50/ae/ff885d2b6e88f3c7520bb74ba319268b42f05d7e583b5dded9837da2723f/pandas-2.3.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:911580460fc4884d9b05254b38a6bfadddfcc6aaef856fb5859e7ca202e45275", size = 11309381, upload-time = "2025-07-07T19:19:31.436Z" }, + { url = "https://files.pythonhosted.org/packages/85/86/1fa345fc17caf5d7780d2699985c03dbe186c68fee00b526813939062bb0/pandas-2.3.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f4d6feeba91744872a600e6edbbd5b033005b431d5ae8379abee5bcfa479fab", size = 11883998, upload-time = "2025-07-07T19:19:34.267Z" }, + { url = "https://files.pythonhosted.org/packages/81/aa/e58541a49b5e6310d89474333e994ee57fea97c8aaa8fc7f00b873059bbf/pandas-2.3.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:fe37e757f462d31a9cd7580236a82f353f5713a80e059a29753cf938c6775d96", size = 12704705, upload-time = "2025-07-07T19:19:36.856Z" }, + { url = "https://files.pythonhosted.org/packages/d5/f9/07086f5b0f2a19872554abeea7658200824f5835c58a106fa8f2ae96a46c/pandas-2.3.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5db9637dbc24b631ff3707269ae4559bce4b7fd75c1c4d7e13f40edc42df4444", size = 13189044, upload-time = "2025-07-07T19:19:39.999Z" }, + { url = "https://files.pythonhosted.org/packages/6e/21/ecf2df680982616459409b09962a8c2065330c7151dc6538069f3b634acf/pandas-2.3.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4645f770f98d656f11c69e81aeb21c6fca076a44bed3dcbb9396a4311bc7f6d8", size = 11567275, upload-time = "2025-07-07T19:19:45.152Z" }, + { url = "https://files.pythonhosted.org/packages/1e/1a/dcb50e44b75419e96b276c9fb023b0f147b3c411be1cd517492aa2a184d4/pandas-2.3.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:342e59589cc454aaff7484d75b816a433350b3d7964d7847327edda4d532a2e3", size = 10811488, upload-time = "2025-07-07T19:19:47.797Z" }, + { url = "https://files.pythonhosted.org/packages/2d/55/66cd2b679f6a27398380eac7574bc24746128f74626a3c02b978ea00e5ce/pandas-2.3.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d12f618d80379fde6af007f65f0c25bd3e40251dbd1636480dfffce2cf1e6da", size = 11763000, upload-time = "2025-07-07T19:19:50.83Z" }, + { url = "https://files.pythonhosted.org/packages/ae/1c/5b9b263c80fd5e231b77df6f78cd7426d1d4ad3a4e858e85b7b3d93d0e9c/pandas-2.3.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd71c47a911da120d72ef173aeac0bf5241423f9bfea57320110a978457e069e", size = 12361395, upload-time = "2025-07-07T19:19:53.714Z" }, + { url = "https://files.pythonhosted.org/packages/f7/74/7e817b31413fbb96366ea327d43d1926a9c48c58074e27e094e2839a0e36/pandas-2.3.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:09e3b1587f0f3b0913e21e8b32c3119174551deb4a4eba4a89bc7377947977e7", size = 13225086, upload-time = "2025-07-07T19:19:56.378Z" }, + { url = "https://files.pythonhosted.org/packages/1f/0f/bc0a44b47eba2f22ae4235719a573d552ef7ad76ed3ea39ae62d554e040b/pandas-2.3.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:2323294c73ed50f612f67e2bf3ae45aea04dce5690778e08a09391897f35ff88", size = 13871698, upload-time = "2025-07-07T19:19:58.854Z" }, + { url = "https://files.pythonhosted.org/packages/fa/cb/6c32f8fadefa4314b740fbe8f74f6a02423bd1549e7c930826df35ac3c1b/pandas-2.3.1-cp39-cp39-win_amd64.whl", hash = "sha256:b4b0de34dc8499c2db34000ef8baad684cfa4cbd836ecee05f323ebfba348c7d", size = 11357186, upload-time = "2025-07-07T19:20:01.475Z" }, +] + +[[package]] +name = "pathspec" +version = "0.12.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043, upload-time = "2023-12-10T22:30:45Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191, upload-time = "2023-12-10T22:30:43.14Z" }, +] + +[[package]] +name = "platformdirs" +version = "4.3.6" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +sdist = { url = "https://files.pythonhosted.org/packages/13/fc/128cc9cb8f03208bdbf93d3aa862e16d376844a14f9a0ce5cf4507372de4/platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907", size = 21302, upload-time = "2024-09-17T19:06:50.688Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3c/a6/bc1012356d8ece4d66dd75c4b9fc6c1f6650ddd5991e421177d9f8f671be/platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb", size = 18439, upload-time = "2024-09-17T19:06:49.212Z" }, +] + +[[package]] +name = "platformdirs" +version = "4.3.8" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/fe/8b/3c73abc9c759ecd3f1f7ceff6685840859e8070c4d947c93fae71f6a0bf2/platformdirs-4.3.8.tar.gz", hash = "sha256:3d512d96e16bcb959a814c9f348431070822a6496326a4be0911c40b5a74c2bc", size = 21362, upload-time = "2025-05-07T22:47:42.121Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fe/39/979e8e21520d4e47a0bbe349e2713c0aac6f3d853d0e5b34d76206c439aa/platformdirs-4.3.8-py3-none-any.whl", hash = "sha256:ff7059bb7eb1179e2685604f4aaf157cfd9535242bd23742eadc3c13542139b4", size = 18567, upload-time = "2025-05-07T22:47:40.376Z" }, +] + +[[package]] +name = "pluggy" +version = "1.5.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +sdist = { url = "https://files.pythonhosted.org/packages/96/2d/02d4312c973c6050a18b314a5ad0b3210edb65a906f868e31c111dede4a6/pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1", size = 67955, upload-time = "2024-04-20T21:34:42.531Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669", size = 20556, upload-time = "2024-04-20T21:34:40.434Z" }, +] + +[[package]] +name = "pluggy" +version = "1.6.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, +] + +[[package]] +name = "pygments" +version = "2.19.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, +] + +[[package]] +name = "pytest" +version = "8.3.5" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +dependencies = [ + { name = "colorama", marker = "python_full_version < '3.9' and sys_platform == 'win32'" }, + { name = "exceptiongroup", marker = "python_full_version < '3.9'" }, + { name = "iniconfig", marker = "python_full_version < '3.9'" }, + { name = "packaging", marker = "python_full_version < '3.9'" }, + { name = "pluggy", version = "1.5.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "tomli", marker = "python_full_version < '3.9'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ae/3c/c9d525a414d506893f0cd8a8d0de7706446213181570cdbd766691164e40/pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845", size = 1450891, upload-time = "2025-03-02T12:54:54.503Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/30/3d/64ad57c803f1fa1e963a7946b6e0fea4a70df53c1a7fed304586539c2bac/pytest-8.3.5-py3-none-any.whl", hash = "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820", size = 343634, upload-time = "2025-03-02T12:54:52.069Z" }, +] + +[[package]] +name = "pytest" +version = "8.4.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", +] +dependencies = [ + { name = "colorama", marker = "python_full_version >= '3.9' and sys_platform == 'win32'" }, + { name = "exceptiongroup", marker = "python_full_version >= '3.9' and python_full_version < '3.11'" }, + { name = "iniconfig", marker = "python_full_version >= '3.9'" }, + { name = "packaging", marker = "python_full_version >= '3.9'" }, + { name = "pluggy", version = "1.6.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, + { name = "pygments", marker = "python_full_version >= '3.9'" }, + { name = "tomli", marker = "python_full_version >= '3.9' and python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/08/ba/45911d754e8eba3d5a841a5ce61a65a685ff1798421ac054f85aa8747dfb/pytest-8.4.1.tar.gz", hash = "sha256:7c67fd69174877359ed9371ec3af8a3d2b04741818c51e5e99cc1742251fa93c", size = 1517714, upload-time = "2025-06-18T05:48:06.109Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/29/16/c8a903f4c4dffe7a12843191437d7cd8e32751d5de349d45d3fe69544e87/pytest-8.4.1-py3-none-any.whl", hash = "sha256:539c70ba6fcead8e78eebbf1115e8b589e7565830d7d006a8723f19ac8a0afb7", size = 365474, upload-time = "2025-06-18T05:48:03.955Z" }, +] + +[[package]] +name = "pytest-asyncio" +version = "0.24.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +dependencies = [ + { name = "pytest", version = "8.3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/52/6d/c6cf50ce320cf8611df7a1254d86233b3df7cc07f9b5f5cbcb82e08aa534/pytest_asyncio-0.24.0.tar.gz", hash = "sha256:d081d828e576d85f875399194281e92bf8a68d60d72d1a2faf2feddb6c46b276", size = 49855, upload-time = "2024-08-22T08:03:18.145Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/96/31/6607dab48616902f76885dfcf62c08d929796fc3b2d2318faf9fd54dbed9/pytest_asyncio-0.24.0-py3-none-any.whl", hash = "sha256:a811296ed596b69bf0b6f3dc40f83bcaf341b155a269052d82efa2b25ac7037b", size = 18024, upload-time = "2024-08-22T08:03:15.536Z" }, +] + +[[package]] +name = "pytest-asyncio" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", +] +dependencies = [ + { name = "backports-asyncio-runner", marker = "python_full_version >= '3.9' and python_full_version < '3.11'" }, + { name = "pytest", version = "8.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, + { name = "typing-extensions", version = "4.14.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4e/51/f8794af39eeb870e87a8c8068642fc07bce0c854d6865d7dd0f2a9d338c2/pytest_asyncio-1.1.0.tar.gz", hash = "sha256:796aa822981e01b68c12e4827b8697108f7205020f24b5793b3c41555dab68ea", size = 46652, upload-time = "2025-07-16T04:29:26.393Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/9d/bf86eddabf8c6c9cb1ea9a869d6873b46f105a5d292d3a6f7071f5b07935/pytest_asyncio-1.1.0-py3-none-any.whl", hash = "sha256:5fe2d69607b0bd75c656d1211f969cadba035030156745ee09e7d71740e58ecf", size = 15157, upload-time = "2025-07-16T04:29:24.929Z" }, +] + +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432, upload-time = "2024-03-01T18:36:20.211Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" }, +] + +[[package]] +name = "pytz" +version = "2025.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f8/bf/abbd3cdfb8fbc7fb3d4d38d320f2441b1e7cbe29be4f23797b4a2b5d8aac/pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3", size = 320884, upload-time = "2025-03-25T02:25:00.538Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225, upload-time = "2025-03-25T02:24:58.468Z" }, +] + +[[package]] +name = "requests" +version = "2.32.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "idna" }, + { name = "urllib3", version = "2.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "urllib3", version = "2.5.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e1/0a/929373653770d8a0d7ea76c37de6e41f11eb07559b103b1c02cafb3f7cf8/requests-2.32.4.tar.gz", hash = "sha256:27d0316682c8a29834d3264820024b62a36942083d52caf2f14c0591336d3422", size = 135258, upload-time = "2025-06-09T16:43:07.34Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7c/e4/56027c4a6b4ae70ca9de302488c5ca95ad4a39e190093d6c1a8ace08341b/requests-2.32.4-py3-none-any.whl", hash = "sha256:27babd3cda2a6d50b30443204ee89830707d396671944c998b5975b031ac2b2c", size = 64847, upload-time = "2025-06-09T16:43:05.728Z" }, +] + +[[package]] +name = "six" +version = "1.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031, upload-time = "2024-12-04T17:35:28.174Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, +] + +[[package]] +name = "tomli" +version = "2.2.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/18/87/302344fed471e44a87289cf4967697d07e532f2421fdaf868a303cbae4ff/tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff", size = 17175, upload-time = "2024-11-27T22:38:36.873Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/43/ca/75707e6efa2b37c77dadb324ae7d9571cb424e61ea73fad7c56c2d14527f/tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249", size = 131077, upload-time = "2024-11-27T22:37:54.956Z" }, + { url = "https://files.pythonhosted.org/packages/c7/16/51ae563a8615d472fdbffc43a3f3d46588c264ac4f024f63f01283becfbb/tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6", size = 123429, upload-time = "2024-11-27T22:37:56.698Z" }, + { url = "https://files.pythonhosted.org/packages/f1/dd/4f6cd1e7b160041db83c694abc78e100473c15d54620083dbd5aae7b990e/tomli-2.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a", size = 226067, upload-time = "2024-11-27T22:37:57.63Z" }, + { url = "https://files.pythonhosted.org/packages/a9/6b/c54ede5dc70d648cc6361eaf429304b02f2871a345bbdd51e993d6cdf550/tomli-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee", size = 236030, upload-time = "2024-11-27T22:37:59.344Z" }, + { url = "https://files.pythonhosted.org/packages/1f/47/999514fa49cfaf7a92c805a86c3c43f4215621855d151b61c602abb38091/tomli-2.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e", size = 240898, upload-time = "2024-11-27T22:38:00.429Z" }, + { url = "https://files.pythonhosted.org/packages/73/41/0a01279a7ae09ee1573b423318e7934674ce06eb33f50936655071d81a24/tomli-2.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4", size = 229894, upload-time = "2024-11-27T22:38:02.094Z" }, + { url = "https://files.pythonhosted.org/packages/55/18/5d8bc5b0a0362311ce4d18830a5d28943667599a60d20118074ea1b01bb7/tomli-2.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106", size = 245319, upload-time = "2024-11-27T22:38:03.206Z" }, + { url = "https://files.pythonhosted.org/packages/92/a3/7ade0576d17f3cdf5ff44d61390d4b3febb8a9fc2b480c75c47ea048c646/tomli-2.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8", size = 238273, upload-time = "2024-11-27T22:38:04.217Z" }, + { url = "https://files.pythonhosted.org/packages/72/6f/fa64ef058ac1446a1e51110c375339b3ec6be245af9d14c87c4a6412dd32/tomli-2.2.1-cp311-cp311-win32.whl", hash = "sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff", size = 98310, upload-time = "2024-11-27T22:38:05.908Z" }, + { url = "https://files.pythonhosted.org/packages/6a/1c/4a2dcde4a51b81be3530565e92eda625d94dafb46dbeb15069df4caffc34/tomli-2.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b", size = 108309, upload-time = "2024-11-27T22:38:06.812Z" }, + { url = "https://files.pythonhosted.org/packages/52/e1/f8af4c2fcde17500422858155aeb0d7e93477a0d59a98e56cbfe75070fd0/tomli-2.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea", size = 132762, upload-time = "2024-11-27T22:38:07.731Z" }, + { url = "https://files.pythonhosted.org/packages/03/b8/152c68bb84fc00396b83e7bbddd5ec0bd3dd409db4195e2a9b3e398ad2e3/tomli-2.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8", size = 123453, upload-time = "2024-11-27T22:38:09.384Z" }, + { url = "https://files.pythonhosted.org/packages/c8/d6/fc9267af9166f79ac528ff7e8c55c8181ded34eb4b0e93daa767b8841573/tomli-2.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192", size = 233486, upload-time = "2024-11-27T22:38:10.329Z" }, + { url = "https://files.pythonhosted.org/packages/5c/51/51c3f2884d7bab89af25f678447ea7d297b53b5a3b5730a7cb2ef6069f07/tomli-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222", size = 242349, upload-time = "2024-11-27T22:38:11.443Z" }, + { url = "https://files.pythonhosted.org/packages/ab/df/bfa89627d13a5cc22402e441e8a931ef2108403db390ff3345c05253935e/tomli-2.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77", size = 252159, upload-time = "2024-11-27T22:38:13.099Z" }, + { url = "https://files.pythonhosted.org/packages/9e/6e/fa2b916dced65763a5168c6ccb91066f7639bdc88b48adda990db10c8c0b/tomli-2.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6", size = 237243, upload-time = "2024-11-27T22:38:14.766Z" }, + { url = "https://files.pythonhosted.org/packages/b4/04/885d3b1f650e1153cbb93a6a9782c58a972b94ea4483ae4ac5cedd5e4a09/tomli-2.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd", size = 259645, upload-time = "2024-11-27T22:38:15.843Z" }, + { url = "https://files.pythonhosted.org/packages/9c/de/6b432d66e986e501586da298e28ebeefd3edc2c780f3ad73d22566034239/tomli-2.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e", size = 244584, upload-time = "2024-11-27T22:38:17.645Z" }, + { url = "https://files.pythonhosted.org/packages/1c/9a/47c0449b98e6e7d1be6cbac02f93dd79003234ddc4aaab6ba07a9a7482e2/tomli-2.2.1-cp312-cp312-win32.whl", hash = "sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98", size = 98875, upload-time = "2024-11-27T22:38:19.159Z" }, + { url = "https://files.pythonhosted.org/packages/ef/60/9b9638f081c6f1261e2688bd487625cd1e660d0a85bd469e91d8db969734/tomli-2.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4", size = 109418, upload-time = "2024-11-27T22:38:20.064Z" }, + { url = "https://files.pythonhosted.org/packages/04/90/2ee5f2e0362cb8a0b6499dc44f4d7d48f8fff06d28ba46e6f1eaa61a1388/tomli-2.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7", size = 132708, upload-time = "2024-11-27T22:38:21.659Z" }, + { url = "https://files.pythonhosted.org/packages/c0/ec/46b4108816de6b385141f082ba99e315501ccd0a2ea23db4a100dd3990ea/tomli-2.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c", size = 123582, upload-time = "2024-11-27T22:38:22.693Z" }, + { url = "https://files.pythonhosted.org/packages/a0/bd/b470466d0137b37b68d24556c38a0cc819e8febe392d5b199dcd7f578365/tomli-2.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13", size = 232543, upload-time = "2024-11-27T22:38:24.367Z" }, + { url = "https://files.pythonhosted.org/packages/d9/e5/82e80ff3b751373f7cead2815bcbe2d51c895b3c990686741a8e56ec42ab/tomli-2.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281", size = 241691, upload-time = "2024-11-27T22:38:26.081Z" }, + { url = "https://files.pythonhosted.org/packages/05/7e/2a110bc2713557d6a1bfb06af23dd01e7dde52b6ee7dadc589868f9abfac/tomli-2.2.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272", size = 251170, upload-time = "2024-11-27T22:38:27.921Z" }, + { url = "https://files.pythonhosted.org/packages/64/7b/22d713946efe00e0adbcdfd6d1aa119ae03fd0b60ebed51ebb3fa9f5a2e5/tomli-2.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140", size = 236530, upload-time = "2024-11-27T22:38:29.591Z" }, + { url = "https://files.pythonhosted.org/packages/38/31/3a76f67da4b0cf37b742ca76beaf819dca0ebef26d78fc794a576e08accf/tomli-2.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2", size = 258666, upload-time = "2024-11-27T22:38:30.639Z" }, + { url = "https://files.pythonhosted.org/packages/07/10/5af1293da642aded87e8a988753945d0cf7e00a9452d3911dd3bb354c9e2/tomli-2.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744", size = 243954, upload-time = "2024-11-27T22:38:31.702Z" }, + { url = "https://files.pythonhosted.org/packages/5b/b9/1ed31d167be802da0fc95020d04cd27b7d7065cc6fbefdd2f9186f60d7bd/tomli-2.2.1-cp313-cp313-win32.whl", hash = "sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec", size = 98724, upload-time = "2024-11-27T22:38:32.837Z" }, + { url = "https://files.pythonhosted.org/packages/c7/32/b0963458706accd9afcfeb867c0f9175a741bf7b19cd424230714d722198/tomli-2.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69", size = 109383, upload-time = "2024-11-27T22:38:34.455Z" }, + { url = "https://files.pythonhosted.org/packages/6e/c2/61d3e0f47e2b74ef40a68b9e6ad5984f6241a942f7cd3bbfbdbd03861ea9/tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc", size = 14257, upload-time = "2024-11-27T22:38:35.385Z" }, +] + +[[package]] +name = "typing-extensions" +version = "4.13.2" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +sdist = { url = "https://files.pythonhosted.org/packages/f6/37/23083fcd6e35492953e8d2aaaa68b860eb422b34627b13f2ce3eb6106061/typing_extensions-4.13.2.tar.gz", hash = "sha256:e6c81219bd689f51865d9e372991c540bda33a0379d5573cddb9a3a23f7caaef", size = 106967, upload-time = "2025-04-10T14:19:05.416Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8b/54/b1ae86c0973cc6f0210b53d508ca3641fb6d0c56823f288d108bc7ab3cc8/typing_extensions-4.13.2-py3-none-any.whl", hash = "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c", size = 45806, upload-time = "2025-04-10T14:19:03.967Z" }, +] + +[[package]] +name = "typing-extensions" +version = "4.14.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/98/5a/da40306b885cc8c09109dc2e1abd358d5684b1425678151cdaed4731c822/typing_extensions-4.14.1.tar.gz", hash = "sha256:38b39f4aeeab64884ce9f74c94263ef78f3c22467c8724005483154c26648d36", size = 107673, upload-time = "2025-07-04T13:28:34.16Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b5/00/d631e67a838026495268c2f6884f3711a15a9a2a96cd244fdaea53b823fb/typing_extensions-4.14.1-py3-none-any.whl", hash = "sha256:d1e1e3b58374dc93031d6eda2420a48ea44a36c2b4766a4fdeb3710755731d76", size = 43906, upload-time = "2025-07-04T13:28:32.743Z" }, +] + +[[package]] +name = "tzdata" +version = "2025.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/95/32/1a225d6164441be760d75c2c42e2780dc0873fe382da3e98a2e1e48361e5/tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9", size = 196380, upload-time = "2025-03-23T13:54:43.652Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8", size = 347839, upload-time = "2025-03-23T13:54:41.845Z" }, +] + +[[package]] +name = "urllib3" +version = "2.2.3" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +sdist = { url = "https://files.pythonhosted.org/packages/ed/63/22ba4ebfe7430b76388e7cd448d5478814d3032121827c12a2cc287e2260/urllib3-2.2.3.tar.gz", hash = "sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9", size = 300677, upload-time = "2024-09-12T10:52:18.401Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ce/d9/5f4c13cecde62396b0d3fe530a50ccea91e7dfc1ccf0e09c228841bb5ba8/urllib3-2.2.3-py3-none-any.whl", hash = "sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac", size = 126338, upload-time = "2024-09-12T10:52:16.589Z" }, +] + +[[package]] +name = "urllib3" +version = "2.5.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/15/22/9ee70a2574a4f4599c47dd506532914ce044817c7752a79b6a51286319bc/urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760", size = 393185, upload-time = "2025-06-18T14:07:41.644Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a7/c2/fe1e52489ae3122415c51f387e221dd0773709bad6c6cdaa599e8a2c5185/urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc", size = 129795, upload-time = "2025-06-18T14:07:40.39Z" }, +] + +[[package]] +name = "websockets" +version = "13.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +sdist = { url = "https://files.pythonhosted.org/packages/e2/73/9223dbc7be3dcaf2a7bbf756c351ec8da04b1fa573edaf545b95f6b0c7fd/websockets-13.1.tar.gz", hash = "sha256:a3b3366087c1bc0a2795111edcadddb8b3b59509d5db5d7ea3fdd69f954a8878", size = 158549, upload-time = "2024-09-21T17:34:21.54Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0a/94/d15dbfc6a5eb636dbc754303fba18208f2e88cf97e733e1d64fb9cb5c89e/websockets-13.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f48c749857f8fb598fb890a75f540e3221d0976ed0bf879cf3c7eef34151acee", size = 157815, upload-time = "2024-09-21T17:32:27.107Z" }, + { url = "https://files.pythonhosted.org/packages/30/02/c04af33f4663945a26f5e8cf561eb140c35452b50af47a83c3fbcfe62ae1/websockets-13.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c7e72ce6bda6fb9409cc1e8164dd41d7c91466fb599eb047cfda72fe758a34a7", size = 155466, upload-time = "2024-09-21T17:32:28.428Z" }, + { url = "https://files.pythonhosted.org/packages/35/e8/719f08d12303ea643655e52d9e9851b2dadbb1991d4926d9ce8862efa2f5/websockets-13.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f779498eeec470295a2b1a5d97aa1bc9814ecd25e1eb637bd9d1c73a327387f6", size = 155716, upload-time = "2024-09-21T17:32:29.905Z" }, + { url = "https://files.pythonhosted.org/packages/91/e1/14963ae0252a8925f7434065d25dcd4701d5e281a0b4b460a3b5963d2594/websockets-13.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4676df3fe46956fbb0437d8800cd5f2b6d41143b6e7e842e60554398432cf29b", size = 164806, upload-time = "2024-09-21T17:32:31.384Z" }, + { url = "https://files.pythonhosted.org/packages/ec/fa/ab28441bae5e682a0f7ddf3d03440c0c352f930da419301f4a717f675ef3/websockets-13.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a7affedeb43a70351bb811dadf49493c9cfd1ed94c9c70095fd177e9cc1541fa", size = 163810, upload-time = "2024-09-21T17:32:32.384Z" }, + { url = "https://files.pythonhosted.org/packages/44/77/dea187bd9d16d4b91566a2832be31f99a40d0f5bfa55eeb638eb2c3bc33d/websockets-13.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1971e62d2caa443e57588e1d82d15f663b29ff9dfe7446d9964a4b6f12c1e700", size = 164125, upload-time = "2024-09-21T17:32:33.398Z" }, + { url = "https://files.pythonhosted.org/packages/cf/d9/3af14544e83f1437eb684b399e6ba0fa769438e869bf5d83d74bc197fae8/websockets-13.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:5f2e75431f8dc4a47f31565a6e1355fb4f2ecaa99d6b89737527ea917066e26c", size = 164532, upload-time = "2024-09-21T17:32:35.109Z" }, + { url = "https://files.pythonhosted.org/packages/1c/8a/6d332eabe7d59dfefe4b8ba6f46c8c5fabb15b71c8a8bc3d2b65de19a7b6/websockets-13.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:58cf7e75dbf7e566088b07e36ea2e3e2bd5676e22216e4cad108d4df4a7402a0", size = 163948, upload-time = "2024-09-21T17:32:36.214Z" }, + { url = "https://files.pythonhosted.org/packages/1a/91/a0aeadbaf3017467a1ee03f8fb67accdae233fe2d5ad4b038c0a84e357b0/websockets-13.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c90d6dec6be2c7d03378a574de87af9b1efea77d0c52a8301dd831ece938452f", size = 163898, upload-time = "2024-09-21T17:32:37.277Z" }, + { url = "https://files.pythonhosted.org/packages/71/31/a90fb47c63e0ae605be914b0b969d7c6e6ffe2038cd744798e4b3fbce53b/websockets-13.1-cp310-cp310-win32.whl", hash = "sha256:730f42125ccb14602f455155084f978bd9e8e57e89b569b4d7f0f0c17a448ffe", size = 158706, upload-time = "2024-09-21T17:32:38.755Z" }, + { url = "https://files.pythonhosted.org/packages/93/ca/9540a9ba80da04dc7f36d790c30cae4252589dbd52ccdc92e75b0be22437/websockets-13.1-cp310-cp310-win_amd64.whl", hash = "sha256:5993260f483d05a9737073be197371940c01b257cc45ae3f1d5d7adb371b266a", size = 159141, upload-time = "2024-09-21T17:32:40.495Z" }, + { url = "https://files.pythonhosted.org/packages/b2/f0/cf0b8a30d86b49e267ac84addbebbc7a48a6e7bb7c19db80f62411452311/websockets-13.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:61fc0dfcda609cda0fc9fe7977694c0c59cf9d749fbb17f4e9483929e3c48a19", size = 157813, upload-time = "2024-09-21T17:32:42.188Z" }, + { url = "https://files.pythonhosted.org/packages/bf/e7/22285852502e33071a8cf0ac814f8988480ec6db4754e067b8b9d0e92498/websockets-13.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ceec59f59d092c5007e815def4ebb80c2de330e9588e101cf8bd94c143ec78a5", size = 155469, upload-time = "2024-09-21T17:32:43.858Z" }, + { url = "https://files.pythonhosted.org/packages/68/d4/c8c7c1e5b40ee03c5cc235955b0fb1ec90e7e37685a5f69229ad4708dcde/websockets-13.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c1dca61c6db1166c48b95198c0b7d9c990b30c756fc2923cc66f68d17dc558fd", size = 155717, upload-time = "2024-09-21T17:32:44.914Z" }, + { url = "https://files.pythonhosted.org/packages/c9/e4/c50999b9b848b1332b07c7fd8886179ac395cb766fda62725d1539e7bc6c/websockets-13.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:308e20f22c2c77f3f39caca508e765f8725020b84aa963474e18c59accbf4c02", size = 165379, upload-time = "2024-09-21T17:32:45.933Z" }, + { url = "https://files.pythonhosted.org/packages/bc/49/4a4ad8c072f18fd79ab127650e47b160571aacfc30b110ee305ba25fffc9/websockets-13.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:62d516c325e6540e8a57b94abefc3459d7dab8ce52ac75c96cad5549e187e3a7", size = 164376, upload-time = "2024-09-21T17:32:46.987Z" }, + { url = "https://files.pythonhosted.org/packages/af/9b/8c06d425a1d5a74fd764dd793edd02be18cf6fc3b1ccd1f29244ba132dc0/websockets-13.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87c6e35319b46b99e168eb98472d6c7d8634ee37750d7693656dc766395df096", size = 164753, upload-time = "2024-09-21T17:32:48.046Z" }, + { url = "https://files.pythonhosted.org/packages/d5/5b/0acb5815095ff800b579ffc38b13ab1b915b317915023748812d24e0c1ac/websockets-13.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:5f9fee94ebafbc3117c30be1844ed01a3b177bb6e39088bc6b2fa1dc15572084", size = 165051, upload-time = "2024-09-21T17:32:49.271Z" }, + { url = "https://files.pythonhosted.org/packages/30/93/c3891c20114eacb1af09dedfcc620c65c397f4fd80a7009cd12d9457f7f5/websockets-13.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:7c1e90228c2f5cdde263253fa5db63e6653f1c00e7ec64108065a0b9713fa1b3", size = 164489, upload-time = "2024-09-21T17:32:50.392Z" }, + { url = "https://files.pythonhosted.org/packages/28/09/af9e19885539759efa2e2cd29b8b3f9eecef7ecefea40d46612f12138b36/websockets-13.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6548f29b0e401eea2b967b2fdc1c7c7b5ebb3eeb470ed23a54cd45ef078a0db9", size = 164438, upload-time = "2024-09-21T17:32:52.223Z" }, + { url = "https://files.pythonhosted.org/packages/b6/08/6f38b8e625b3d93de731f1d248cc1493327f16cb45b9645b3e791782cff0/websockets-13.1-cp311-cp311-win32.whl", hash = "sha256:c11d4d16e133f6df8916cc5b7e3e96ee4c44c936717d684a94f48f82edb7c92f", size = 158710, upload-time = "2024-09-21T17:32:53.244Z" }, + { url = "https://files.pythonhosted.org/packages/fb/39/ec8832ecb9bb04a8d318149005ed8cee0ba4e0205835da99e0aa497a091f/websockets-13.1-cp311-cp311-win_amd64.whl", hash = "sha256:d04f13a1d75cb2b8382bdc16ae6fa58c97337253826dfe136195b7f89f661557", size = 159137, upload-time = "2024-09-21T17:32:54.721Z" }, + { url = "https://files.pythonhosted.org/packages/df/46/c426282f543b3c0296cf964aa5a7bb17e984f58dde23460c3d39b3148fcf/websockets-13.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:9d75baf00138f80b48f1eac72ad1535aac0b6461265a0bcad391fc5aba875cfc", size = 157821, upload-time = "2024-09-21T17:32:56.442Z" }, + { url = "https://files.pythonhosted.org/packages/aa/85/22529867010baac258da7c45848f9415e6cf37fef00a43856627806ffd04/websockets-13.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:9b6f347deb3dcfbfde1c20baa21c2ac0751afaa73e64e5b693bb2b848efeaa49", size = 155480, upload-time = "2024-09-21T17:32:57.698Z" }, + { url = "https://files.pythonhosted.org/packages/29/2c/bdb339bfbde0119a6e84af43ebf6275278698a2241c2719afc0d8b0bdbf2/websockets-13.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:de58647e3f9c42f13f90ac7e5f58900c80a39019848c5547bc691693098ae1bd", size = 155715, upload-time = "2024-09-21T17:32:59.429Z" }, + { url = "https://files.pythonhosted.org/packages/9f/d0/8612029ea04c5c22bf7af2fd3d63876c4eaeef9b97e86c11972a43aa0e6c/websockets-13.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1b54689e38d1279a51d11e3467dd2f3a50f5f2e879012ce8f2d6943f00e83f0", size = 165647, upload-time = "2024-09-21T17:33:00.495Z" }, + { url = "https://files.pythonhosted.org/packages/56/04/1681ed516fa19ca9083f26d3f3a302257e0911ba75009533ed60fbb7b8d1/websockets-13.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cf1781ef73c073e6b0f90af841aaf98501f975d306bbf6221683dd594ccc52b6", size = 164592, upload-time = "2024-09-21T17:33:02.223Z" }, + { url = "https://files.pythonhosted.org/packages/38/6f/a96417a49c0ed132bb6087e8e39a37db851c70974f5c724a4b2a70066996/websockets-13.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d23b88b9388ed85c6faf0e74d8dec4f4d3baf3ecf20a65a47b836d56260d4b9", size = 165012, upload-time = "2024-09-21T17:33:03.288Z" }, + { url = "https://files.pythonhosted.org/packages/40/8b/fccf294919a1b37d190e86042e1a907b8f66cff2b61e9befdbce03783e25/websockets-13.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3c78383585f47ccb0fcf186dcb8a43f5438bd7d8f47d69e0b56f71bf431a0a68", size = 165311, upload-time = "2024-09-21T17:33:04.728Z" }, + { url = "https://files.pythonhosted.org/packages/c1/61/f8615cf7ce5fe538476ab6b4defff52beb7262ff8a73d5ef386322d9761d/websockets-13.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:d6d300f8ec35c24025ceb9b9019ae9040c1ab2f01cddc2bcc0b518af31c75c14", size = 164692, upload-time = "2024-09-21T17:33:05.829Z" }, + { url = "https://files.pythonhosted.org/packages/5c/f1/a29dd6046d3a722d26f182b783a7997d25298873a14028c4760347974ea3/websockets-13.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a9dcaf8b0cc72a392760bb8755922c03e17a5a54e08cca58e8b74f6902b433cf", size = 164686, upload-time = "2024-09-21T17:33:06.823Z" }, + { url = "https://files.pythonhosted.org/packages/0f/99/ab1cdb282f7e595391226f03f9b498f52109d25a2ba03832e21614967dfa/websockets-13.1-cp312-cp312-win32.whl", hash = "sha256:2f85cf4f2a1ba8f602298a853cec8526c2ca42a9a4b947ec236eaedb8f2dc80c", size = 158712, upload-time = "2024-09-21T17:33:07.877Z" }, + { url = "https://files.pythonhosted.org/packages/46/93/e19160db48b5581feac8468330aa11b7292880a94a37d7030478596cc14e/websockets-13.1-cp312-cp312-win_amd64.whl", hash = "sha256:38377f8b0cdeee97c552d20cf1865695fcd56aba155ad1b4ca8779a5b6ef4ac3", size = 159145, upload-time = "2024-09-21T17:33:09.202Z" }, + { url = "https://files.pythonhosted.org/packages/51/20/2b99ca918e1cbd33c53db2cace5f0c0cd8296fc77558e1908799c712e1cd/websockets-13.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a9ab1e71d3d2e54a0aa646ab6d4eebfaa5f416fe78dfe4da2839525dc5d765c6", size = 157828, upload-time = "2024-09-21T17:33:10.987Z" }, + { url = "https://files.pythonhosted.org/packages/b8/47/0932a71d3d9c0e9483174f60713c84cee58d62839a143f21a2bcdbd2d205/websockets-13.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b9d7439d7fab4dce00570bb906875734df13d9faa4b48e261c440a5fec6d9708", size = 155487, upload-time = "2024-09-21T17:33:12.153Z" }, + { url = "https://files.pythonhosted.org/packages/a9/60/f1711eb59ac7a6c5e98e5637fef5302f45b6f76a2c9d64fd83bbb341377a/websockets-13.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:327b74e915cf13c5931334c61e1a41040e365d380f812513a255aa804b183418", size = 155721, upload-time = "2024-09-21T17:33:13.909Z" }, + { url = "https://files.pythonhosted.org/packages/6a/e6/ba9a8db7f9d9b0e5f829cf626ff32677f39824968317223605a6b419d445/websockets-13.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:325b1ccdbf5e5725fdcb1b0e9ad4d2545056479d0eee392c291c1bf76206435a", size = 165609, upload-time = "2024-09-21T17:33:14.967Z" }, + { url = "https://files.pythonhosted.org/packages/c1/22/4ec80f1b9c27a0aebd84ccd857252eda8418ab9681eb571b37ca4c5e1305/websockets-13.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:346bee67a65f189e0e33f520f253d5147ab76ae42493804319b5716e46dddf0f", size = 164556, upload-time = "2024-09-21T17:33:17.113Z" }, + { url = "https://files.pythonhosted.org/packages/27/ac/35f423cb6bb15600438db80755609d27eda36d4c0b3c9d745ea12766c45e/websockets-13.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:91a0fa841646320ec0d3accdff5b757b06e2e5c86ba32af2e0815c96c7a603c5", size = 164993, upload-time = "2024-09-21T17:33:18.168Z" }, + { url = "https://files.pythonhosted.org/packages/31/4e/98db4fd267f8be9e52e86b6ee4e9aa7c42b83452ea0ea0672f176224b977/websockets-13.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:18503d2c5f3943e93819238bf20df71982d193f73dcecd26c94514f417f6b135", size = 165360, upload-time = "2024-09-21T17:33:19.233Z" }, + { url = "https://files.pythonhosted.org/packages/3f/15/3f0de7cda70ffc94b7e7024544072bc5b26e2c1eb36545291abb755d8cdb/websockets-13.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:a9cd1af7e18e5221d2878378fbc287a14cd527fdd5939ed56a18df8a31136bb2", size = 164745, upload-time = "2024-09-21T17:33:20.361Z" }, + { url = "https://files.pythonhosted.org/packages/a1/6e/66b6b756aebbd680b934c8bdbb6dcb9ce45aad72cde5f8a7208dbb00dd36/websockets-13.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:70c5be9f416aa72aab7a2a76c90ae0a4fe2755c1816c153c1a2bcc3333ce4ce6", size = 164732, upload-time = "2024-09-21T17:33:23.103Z" }, + { url = "https://files.pythonhosted.org/packages/35/c6/12e3aab52c11aeb289e3dbbc05929e7a9d90d7a9173958477d3ef4f8ce2d/websockets-13.1-cp313-cp313-win32.whl", hash = "sha256:624459daabeb310d3815b276c1adef475b3e6804abaf2d9d2c061c319f7f187d", size = 158709, upload-time = "2024-09-21T17:33:24.196Z" }, + { url = "https://files.pythonhosted.org/packages/41/d8/63d6194aae711d7263df4498200c690a9c39fb437ede10f3e157a6343e0d/websockets-13.1-cp313-cp313-win_amd64.whl", hash = "sha256:c518e84bb59c2baae725accd355c8dc517b4a3ed8db88b4bc93c78dae2974bf2", size = 159144, upload-time = "2024-09-21T17:33:25.96Z" }, + { url = "https://files.pythonhosted.org/packages/83/69/59872420e5bce60db166d6fba39ee24c719d339fb0ae48cb2ce580129882/websockets-13.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:c7934fd0e920e70468e676fe7f1b7261c1efa0d6c037c6722278ca0228ad9d0d", size = 157811, upload-time = "2024-09-21T17:33:27.379Z" }, + { url = "https://files.pythonhosted.org/packages/bb/f7/0610032e0d3981758fdd6ee7c68cc02ebf668a762c5178d3d91748228849/websockets-13.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:149e622dc48c10ccc3d2760e5f36753db9cacf3ad7bc7bbbfd7d9c819e286f23", size = 155471, upload-time = "2024-09-21T17:33:28.473Z" }, + { url = "https://files.pythonhosted.org/packages/55/2f/c43173a72ea395263a427a36d25bce2675f41c809424466a13c61a9a2d61/websockets-13.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a569eb1b05d72f9bce2ebd28a1ce2054311b66677fcd46cf36204ad23acead8c", size = 155713, upload-time = "2024-09-21T17:33:29.795Z" }, + { url = "https://files.pythonhosted.org/packages/92/7e/8fa930c6426a56c47910792717787640329e4a0e37cdfda20cf89da67126/websockets-13.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:95df24ca1e1bd93bbca51d94dd049a984609687cb2fb08a7f2c56ac84e9816ea", size = 164995, upload-time = "2024-09-21T17:33:30.802Z" }, + { url = "https://files.pythonhosted.org/packages/27/29/50ed4c68a3f606565a2db4b13948ae7b6f6c53aa9f8f258d92be6698d276/websockets-13.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d8dbb1bf0c0a4ae8b40bdc9be7f644e2f3fb4e8a9aca7145bfa510d4a374eeb7", size = 164057, upload-time = "2024-09-21T17:33:31.862Z" }, + { url = "https://files.pythonhosted.org/packages/3c/0e/60da63b1c53c47f389f79312b3356cb305600ffad1274d7ec473128d4e6b/websockets-13.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:035233b7531fb92a76beefcbf479504db8c72eb3bff41da55aecce3a0f729e54", size = 164340, upload-time = "2024-09-21T17:33:33.022Z" }, + { url = "https://files.pythonhosted.org/packages/20/ef/d87c5fc0aa7fafad1d584b6459ddfe062edf0d0dd64800a02e67e5de048b/websockets-13.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:e4450fc83a3df53dec45922b576e91e94f5578d06436871dce3a6be38e40f5db", size = 164222, upload-time = "2024-09-21T17:33:34.423Z" }, + { url = "https://files.pythonhosted.org/packages/f2/c4/7916e1f6b5252d3dcb9121b67d7fdbb2d9bf5067a6d8c88885ba27a9e69c/websockets-13.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:463e1c6ec853202dd3657f156123d6b4dad0c546ea2e2e38be2b3f7c5b8e7295", size = 163647, upload-time = "2024-09-21T17:33:35.841Z" }, + { url = "https://files.pythonhosted.org/packages/de/df/2ebebb807f10993c35c10cbd3628a7944b66bd5fb6632a561f8666f3a68e/websockets-13.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:6d6855bbe70119872c05107e38fbc7f96b1d8cb047d95c2c50869a46c65a8e96", size = 163590, upload-time = "2024-09-21T17:33:37.61Z" }, + { url = "https://files.pythonhosted.org/packages/b5/82/d48911f56bb993c11099a1ff1d4041d9d1481d50271100e8ee62bc28f365/websockets-13.1-cp38-cp38-win32.whl", hash = "sha256:204e5107f43095012b00f1451374693267adbb832d29966a01ecc4ce1db26faf", size = 158701, upload-time = "2024-09-21T17:33:38.695Z" }, + { url = "https://files.pythonhosted.org/packages/8b/b3/945aacb21fc89ad150403cbaa974c9e846f098f16d9f39a3dd6094f9beb1/websockets-13.1-cp38-cp38-win_amd64.whl", hash = "sha256:485307243237328c022bc908b90e4457d0daa8b5cf4b3723fd3c4a8012fce4c6", size = 159146, upload-time = "2024-09-21T17:33:39.855Z" }, + { url = "https://files.pythonhosted.org/packages/61/26/5f7a7fb03efedb4f90ed61968338bfe7c389863b0ceda239b94ae61c5ae4/websockets-13.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:9b37c184f8b976f0c0a231a5f3d6efe10807d41ccbe4488df8c74174805eea7d", size = 157810, upload-time = "2024-09-21T17:33:40.94Z" }, + { url = "https://files.pythonhosted.org/packages/0e/d4/9b4814a07dffaa7a79d71b4944d10836f9adbd527a113f6675734ef3abed/websockets-13.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:163e7277e1a0bd9fb3c8842a71661ad19c6aa7bb3d6678dc7f89b17fbcc4aeb7", size = 155467, upload-time = "2024-09-21T17:33:42.075Z" }, + { url = "https://files.pythonhosted.org/packages/1a/1a/2abdc7ce3b56429ae39d6bfb48d8c791f5a26bbcb6f44aabcf71ffc3fda2/websockets-13.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4b889dbd1342820cc210ba44307cf75ae5f2f96226c0038094455a96e64fb07a", size = 155714, upload-time = "2024-09-21T17:33:43.128Z" }, + { url = "https://files.pythonhosted.org/packages/2a/98/189d7cf232753a719b2726ec55e7922522632248d5d830adf078e3f612be/websockets-13.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:586a356928692c1fed0eca68b4d1c2cbbd1ca2acf2ac7e7ebd3b9052582deefa", size = 164587, upload-time = "2024-09-21T17:33:44.27Z" }, + { url = "https://files.pythonhosted.org/packages/a5/2b/fb77cedf3f9f55ef8605238c801eef6b9a5269b01a396875a86896aea3a6/websockets-13.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7bd6abf1e070a6b72bfeb71049d6ad286852e285f146682bf30d0296f5fbadfa", size = 163588, upload-time = "2024-09-21T17:33:45.38Z" }, + { url = "https://files.pythonhosted.org/packages/a3/b7/070481b83d2d5ac0f19233d9f364294e224e6478b0762f07fa7f060e0619/websockets-13.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d2aad13a200e5934f5a6767492fb07151e1de1d6079c003ab31e1823733ae79", size = 163894, upload-time = "2024-09-21T17:33:46.651Z" }, + { url = "https://files.pythonhosted.org/packages/eb/be/d6e1cff7d441cfe5eafaacc5935463e5f14c8b1c0d39cb8afde82709b55a/websockets-13.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:df01aea34b6e9e33572c35cd16bae5a47785e7d5c8cb2b54b2acdb9678315a17", size = 164315, upload-time = "2024-09-21T17:33:48.432Z" }, + { url = "https://files.pythonhosted.org/packages/8b/5e/ffa234473e46ab2d3f9fd9858163d5db3ecea1439e4cb52966d78906424b/websockets-13.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:e54affdeb21026329fb0744ad187cf812f7d3c2aa702a5edb562b325191fcab6", size = 163714, upload-time = "2024-09-21T17:33:49.548Z" }, + { url = "https://files.pythonhosted.org/packages/cc/92/cea9eb9d381ca57065a5eb4ec2ce7a291bd96c85ce742915c3c9ffc1069f/websockets-13.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:9ef8aa8bdbac47f4968a5d66462a2a0935d044bf35c0e5a8af152d58516dbeb5", size = 163673, upload-time = "2024-09-21T17:33:51.056Z" }, + { url = "https://files.pythonhosted.org/packages/a4/f1/279104fff239bfd04c12b1e58afea227d72fd1acf431e3eed3f6ac2c96d2/websockets-13.1-cp39-cp39-win32.whl", hash = "sha256:deeb929efe52bed518f6eb2ddc00cc496366a14c726005726ad62c2dd9017a3c", size = 158702, upload-time = "2024-09-21T17:33:52.584Z" }, + { url = "https://files.pythonhosted.org/packages/25/0b/b87370ff141375c41f7dd67941728e4b3682ebb45882591516c792a2ebee/websockets-13.1-cp39-cp39-win_amd64.whl", hash = "sha256:7c65ffa900e7cc958cd088b9a9157a8141c991f8c53d11087e6fb7277a03f81d", size = 159146, upload-time = "2024-09-21T17:33:53.781Z" }, + { url = "https://files.pythonhosted.org/packages/2d/75/6da22cb3ad5b8c606963f9a5f9f88656256fecc29d420b4b2bf9e0c7d56f/websockets-13.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:5dd6da9bec02735931fccec99d97c29f47cc61f644264eb995ad6c0c27667238", size = 155499, upload-time = "2024-09-21T17:33:54.917Z" }, + { url = "https://files.pythonhosted.org/packages/c0/ba/22833d58629088fcb2ccccedfae725ac0bbcd713319629e97125b52ac681/websockets-13.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:2510c09d8e8df777177ee3d40cd35450dc169a81e747455cc4197e63f7e7bfe5", size = 155737, upload-time = "2024-09-21T17:33:56.052Z" }, + { url = "https://files.pythonhosted.org/packages/95/54/61684fe22bdb831e9e1843d972adadf359cf04ab8613285282baea6a24bb/websockets-13.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f1c3cf67185543730888b20682fb186fc8d0fa6f07ccc3ef4390831ab4b388d9", size = 157095, upload-time = "2024-09-21T17:33:57.21Z" }, + { url = "https://files.pythonhosted.org/packages/fc/f5/6652fb82440813822022a9301a30afde85e5ff3fb2aebb77f34aabe2b4e8/websockets-13.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bcc03c8b72267e97b49149e4863d57c2d77f13fae12066622dc78fe322490fe6", size = 156701, upload-time = "2024-09-21T17:33:59.061Z" }, + { url = "https://files.pythonhosted.org/packages/67/33/ae82a7b860fa8a08aba68818bdf7ff61f04598aa5ab96df4cd5a3e418ca4/websockets-13.1-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:004280a140f220c812e65f36944a9ca92d766b6cc4560be652a0a3883a79ed8a", size = 156654, upload-time = "2024-09-21T17:34:00.944Z" }, + { url = "https://files.pythonhosted.org/packages/63/0b/a1b528d36934f833e20f6da1032b995bf093d55cb416b9f2266f229fb237/websockets-13.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:e2620453c075abeb0daa949a292e19f56de518988e079c36478bacf9546ced23", size = 159192, upload-time = "2024-09-21T17:34:02.656Z" }, + { url = "https://files.pythonhosted.org/packages/5e/a1/5ae6d0ef2e61e2b77b3b4678949a634756544186620a728799acdf5c3482/websockets-13.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:9156c45750b37337f7b0b00e6248991a047be4aa44554c9886fe6bdd605aab3b", size = 155433, upload-time = "2024-09-21T17:34:03.88Z" }, + { url = "https://files.pythonhosted.org/packages/0d/2f/addd33f85600d210a445f817ff0d79d2b4d0eb6f3c95b9f35531ebf8f57c/websockets-13.1-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:80c421e07973a89fbdd93e6f2003c17d20b69010458d3a8e37fb47874bd67d51", size = 155733, upload-time = "2024-09-21T17:34:05.173Z" }, + { url = "https://files.pythonhosted.org/packages/74/0b/f8ec74ac3b14a983289a1b42dc2c518a0e2030b486d0549d4f51ca11e7c9/websockets-13.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:82d0ba76371769d6a4e56f7e83bb8e81846d17a6190971e38b5de108bde9b0d7", size = 157093, upload-time = "2024-09-21T17:34:06.398Z" }, + { url = "https://files.pythonhosted.org/packages/ad/4c/aa5cc2f718ee4d797411202f332c8281f04c42d15f55b02f7713320f7a03/websockets-13.1-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e9875a0143f07d74dc5e1ded1c4581f0d9f7ab86c78994e2ed9e95050073c94d", size = 156701, upload-time = "2024-09-21T17:34:07.582Z" }, + { url = "https://files.pythonhosted.org/packages/1f/4b/7c5b2d0d0f0f1a54f27c60107cf1f201bee1f88c5508f87408b470d09a9c/websockets-13.1-pp38-pypy38_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a11e38ad8922c7961447f35c7b17bffa15de4d17c70abd07bfbe12d6faa3e027", size = 156648, upload-time = "2024-09-21T17:34:08.734Z" }, + { url = "https://files.pythonhosted.org/packages/f3/63/35f3fb073884a9fd1ce5413b2dcdf0d9198b03dac6274197111259cbde06/websockets-13.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:4059f790b6ae8768471cddb65d3c4fe4792b0ab48e154c9f0a04cefaabcd5978", size = 159188, upload-time = "2024-09-21T17:34:10.018Z" }, + { url = "https://files.pythonhosted.org/packages/59/fd/e4bf9a7159dba6a16c59ae9e670e3e8ad9dcb6791bc0599eb86de32d50a9/websockets-13.1-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:25c35bf84bf7c7369d247f0b8cfa157f989862c49104c5cf85cb5436a641d93e", size = 155499, upload-time = "2024-09-21T17:34:11.3Z" }, + { url = "https://files.pythonhosted.org/packages/74/42/d48ede93cfe0c343f3b552af08efc60778d234989227b16882eed1b8b189/websockets-13.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:83f91d8a9bb404b8c2c41a707ac7f7f75b9442a0a876df295de27251a856ad09", size = 155731, upload-time = "2024-09-21T17:34:13.151Z" }, + { url = "https://files.pythonhosted.org/packages/f6/f2/2ef6bff1c90a43b80622a17c0852b48c09d3954ab169266ad7b15e17cdcb/websockets-13.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7a43cfdcddd07f4ca2b1afb459824dd3c6d53a51410636a2c7fc97b9a8cf4842", size = 157093, upload-time = "2024-09-21T17:34:14.52Z" }, + { url = "https://files.pythonhosted.org/packages/d1/14/6f20bbaeeb350f155edf599aad949c554216f90e5d4ae7373d1f2e5931fb/websockets-13.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:48a2ef1381632a2f0cb4efeff34efa97901c9fbc118e01951ad7cfc10601a9bb", size = 156701, upload-time = "2024-09-21T17:34:15.692Z" }, + { url = "https://files.pythonhosted.org/packages/c7/86/38279dfefecd035e22b79c38722d4f87c4b6196f1556b7a631d0a3095ca7/websockets-13.1-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:459bf774c754c35dbb487360b12c5727adab887f1622b8aed5755880a21c4a20", size = 156649, upload-time = "2024-09-21T17:34:17.335Z" }, + { url = "https://files.pythonhosted.org/packages/f6/c5/12c6859a2eaa8c53f59a647617a27f1835a226cd7106c601067c53251d98/websockets-13.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:95858ca14a9f6fa8413d29e0a585b31b278388aa775b8a81fa24830123874678", size = 159187, upload-time = "2024-09-21T17:34:18.538Z" }, + { url = "https://files.pythonhosted.org/packages/56/27/96a5cd2626d11c8280656c6c71d8ab50fe006490ef9971ccd154e0c42cd2/websockets-13.1-py3-none-any.whl", hash = "sha256:a9a396a6ad26130cdae92ae10c36af09d9bfe6cafe69670fd3b6da9b07b4044f", size = 152134, upload-time = "2024-09-21T17:34:19.904Z" }, +] + +[[package]] +name = "websockets" +version = "15.0.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/21/e6/26d09fab466b7ca9c7737474c52be4f76a40301b08362eb2dbc19dcc16c1/websockets-15.0.1.tar.gz", hash = "sha256:82544de02076bafba038ce055ee6412d68da13ab47f0c60cab827346de828dee", size = 177016, upload-time = "2025-03-05T20:03:41.606Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/da/6462a9f510c0c49837bbc9345aca92d767a56c1fb2939e1579df1e1cdcf7/websockets-15.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d63efaa0cd96cf0c5fe4d581521d9fa87744540d4bc999ae6e08595a1014b45b", size = 175423, upload-time = "2025-03-05T20:01:35.363Z" }, + { url = "https://files.pythonhosted.org/packages/1c/9f/9d11c1a4eb046a9e106483b9ff69bce7ac880443f00e5ce64261b47b07e7/websockets-15.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ac60e3b188ec7574cb761b08d50fcedf9d77f1530352db4eef1707fe9dee7205", size = 173080, upload-time = "2025-03-05T20:01:37.304Z" }, + { url = "https://files.pythonhosted.org/packages/d5/4f/b462242432d93ea45f297b6179c7333dd0402b855a912a04e7fc61c0d71f/websockets-15.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5756779642579d902eed757b21b0164cd6fe338506a8083eb58af5c372e39d9a", size = 173329, upload-time = "2025-03-05T20:01:39.668Z" }, + { url = "https://files.pythonhosted.org/packages/6e/0c/6afa1f4644d7ed50284ac59cc70ef8abd44ccf7d45850d989ea7310538d0/websockets-15.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fdfe3e2a29e4db3659dbd5bbf04560cea53dd9610273917799f1cde46aa725e", size = 182312, upload-time = "2025-03-05T20:01:41.815Z" }, + { url = "https://files.pythonhosted.org/packages/dd/d4/ffc8bd1350b229ca7a4db2a3e1c482cf87cea1baccd0ef3e72bc720caeec/websockets-15.0.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c2529b320eb9e35af0fa3016c187dffb84a3ecc572bcee7c3ce302bfeba52bf", size = 181319, upload-time = "2025-03-05T20:01:43.967Z" }, + { url = "https://files.pythonhosted.org/packages/97/3a/5323a6bb94917af13bbb34009fac01e55c51dfde354f63692bf2533ffbc2/websockets-15.0.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac1e5c9054fe23226fb11e05a6e630837f074174c4c2f0fe442996112a6de4fb", size = 181631, upload-time = "2025-03-05T20:01:46.104Z" }, + { url = "https://files.pythonhosted.org/packages/a6/cc/1aeb0f7cee59ef065724041bb7ed667b6ab1eeffe5141696cccec2687b66/websockets-15.0.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:5df592cd503496351d6dc14f7cdad49f268d8e618f80dce0cd5a36b93c3fc08d", size = 182016, upload-time = "2025-03-05T20:01:47.603Z" }, + { url = "https://files.pythonhosted.org/packages/79/f9/c86f8f7af208e4161a7f7e02774e9d0a81c632ae76db2ff22549e1718a51/websockets-15.0.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:0a34631031a8f05657e8e90903e656959234f3a04552259458aac0b0f9ae6fd9", size = 181426, upload-time = "2025-03-05T20:01:48.949Z" }, + { url = "https://files.pythonhosted.org/packages/c7/b9/828b0bc6753db905b91df6ae477c0b14a141090df64fb17f8a9d7e3516cf/websockets-15.0.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3d00075aa65772e7ce9e990cab3ff1de702aa09be3940d1dc88d5abf1ab8a09c", size = 181360, upload-time = "2025-03-05T20:01:50.938Z" }, + { url = "https://files.pythonhosted.org/packages/89/fb/250f5533ec468ba6327055b7d98b9df056fb1ce623b8b6aaafb30b55d02e/websockets-15.0.1-cp310-cp310-win32.whl", hash = "sha256:1234d4ef35db82f5446dca8e35a7da7964d02c127b095e172e54397fb6a6c256", size = 176388, upload-time = "2025-03-05T20:01:52.213Z" }, + { url = "https://files.pythonhosted.org/packages/1c/46/aca7082012768bb98e5608f01658ff3ac8437e563eca41cf068bd5849a5e/websockets-15.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:39c1fec2c11dc8d89bba6b2bf1556af381611a173ac2b511cf7231622058af41", size = 176830, upload-time = "2025-03-05T20:01:53.922Z" }, + { url = "https://files.pythonhosted.org/packages/9f/32/18fcd5919c293a398db67443acd33fde142f283853076049824fc58e6f75/websockets-15.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:823c248b690b2fd9303ba00c4f66cd5e2d8c3ba4aa968b2779be9532a4dad431", size = 175423, upload-time = "2025-03-05T20:01:56.276Z" }, + { url = "https://files.pythonhosted.org/packages/76/70/ba1ad96b07869275ef42e2ce21f07a5b0148936688c2baf7e4a1f60d5058/websockets-15.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678999709e68425ae2593acf2e3ebcbcf2e69885a5ee78f9eb80e6e371f1bf57", size = 173082, upload-time = "2025-03-05T20:01:57.563Z" }, + { url = "https://files.pythonhosted.org/packages/86/f2/10b55821dd40eb696ce4704a87d57774696f9451108cff0d2824c97e0f97/websockets-15.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d50fd1ee42388dcfb2b3676132c78116490976f1300da28eb629272d5d93e905", size = 173330, upload-time = "2025-03-05T20:01:59.063Z" }, + { url = "https://files.pythonhosted.org/packages/a5/90/1c37ae8b8a113d3daf1065222b6af61cc44102da95388ac0018fcb7d93d9/websockets-15.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d99e5546bf73dbad5bf3547174cd6cb8ba7273062a23808ffea025ecb1cf8562", size = 182878, upload-time = "2025-03-05T20:02:00.305Z" }, + { url = "https://files.pythonhosted.org/packages/8e/8d/96e8e288b2a41dffafb78e8904ea7367ee4f891dafc2ab8d87e2124cb3d3/websockets-15.0.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:66dd88c918e3287efc22409d426c8f729688d89a0c587c88971a0faa2c2f3792", size = 181883, upload-time = "2025-03-05T20:02:03.148Z" }, + { url = "https://files.pythonhosted.org/packages/93/1f/5d6dbf551766308f6f50f8baf8e9860be6182911e8106da7a7f73785f4c4/websockets-15.0.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8dd8327c795b3e3f219760fa603dcae1dcc148172290a8ab15158cf85a953413", size = 182252, upload-time = "2025-03-05T20:02:05.29Z" }, + { url = "https://files.pythonhosted.org/packages/d4/78/2d4fed9123e6620cbf1706c0de8a1632e1a28e7774d94346d7de1bba2ca3/websockets-15.0.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8fdc51055e6ff4adeb88d58a11042ec9a5eae317a0a53d12c062c8a8865909e8", size = 182521, upload-time = "2025-03-05T20:02:07.458Z" }, + { url = "https://files.pythonhosted.org/packages/e7/3b/66d4c1b444dd1a9823c4a81f50231b921bab54eee2f69e70319b4e21f1ca/websockets-15.0.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:693f0192126df6c2327cce3baa7c06f2a117575e32ab2308f7f8216c29d9e2e3", size = 181958, upload-time = "2025-03-05T20:02:09.842Z" }, + { url = "https://files.pythonhosted.org/packages/08/ff/e9eed2ee5fed6f76fdd6032ca5cd38c57ca9661430bb3d5fb2872dc8703c/websockets-15.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:54479983bd5fb469c38f2f5c7e3a24f9a4e70594cd68cd1fa6b9340dadaff7cf", size = 181918, upload-time = "2025-03-05T20:02:11.968Z" }, + { url = "https://files.pythonhosted.org/packages/d8/75/994634a49b7e12532be6a42103597b71098fd25900f7437d6055ed39930a/websockets-15.0.1-cp311-cp311-win32.whl", hash = "sha256:16b6c1b3e57799b9d38427dda63edcbe4926352c47cf88588c0be4ace18dac85", size = 176388, upload-time = "2025-03-05T20:02:13.32Z" }, + { url = "https://files.pythonhosted.org/packages/98/93/e36c73f78400a65f5e236cd376713c34182e6663f6889cd45a4a04d8f203/websockets-15.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:27ccee0071a0e75d22cb35849b1db43f2ecd3e161041ac1ee9d2352ddf72f065", size = 176828, upload-time = "2025-03-05T20:02:14.585Z" }, + { url = "https://files.pythonhosted.org/packages/51/6b/4545a0d843594f5d0771e86463606a3988b5a09ca5123136f8a76580dd63/websockets-15.0.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:3e90baa811a5d73f3ca0bcbf32064d663ed81318ab225ee4f427ad4e26e5aff3", size = 175437, upload-time = "2025-03-05T20:02:16.706Z" }, + { url = "https://files.pythonhosted.org/packages/f4/71/809a0f5f6a06522af902e0f2ea2757f71ead94610010cf570ab5c98e99ed/websockets-15.0.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:592f1a9fe869c778694f0aa806ba0374e97648ab57936f092fd9d87f8bc03665", size = 173096, upload-time = "2025-03-05T20:02:18.832Z" }, + { url = "https://files.pythonhosted.org/packages/3d/69/1a681dd6f02180916f116894181eab8b2e25b31e484c5d0eae637ec01f7c/websockets-15.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0701bc3cfcb9164d04a14b149fd74be7347a530ad3bbf15ab2c678a2cd3dd9a2", size = 173332, upload-time = "2025-03-05T20:02:20.187Z" }, + { url = "https://files.pythonhosted.org/packages/a6/02/0073b3952f5bce97eafbb35757f8d0d54812b6174ed8dd952aa08429bcc3/websockets-15.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8b56bdcdb4505c8078cb6c7157d9811a85790f2f2b3632c7d1462ab5783d215", size = 183152, upload-time = "2025-03-05T20:02:22.286Z" }, + { url = "https://files.pythonhosted.org/packages/74/45/c205c8480eafd114b428284840da0b1be9ffd0e4f87338dc95dc6ff961a1/websockets-15.0.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0af68c55afbd5f07986df82831c7bff04846928ea8d1fd7f30052638788bc9b5", size = 182096, upload-time = "2025-03-05T20:02:24.368Z" }, + { url = "https://files.pythonhosted.org/packages/14/8f/aa61f528fba38578ec553c145857a181384c72b98156f858ca5c8e82d9d3/websockets-15.0.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64dee438fed052b52e4f98f76c5790513235efaa1ef7f3f2192c392cd7c91b65", size = 182523, upload-time = "2025-03-05T20:02:25.669Z" }, + { url = "https://files.pythonhosted.org/packages/ec/6d/0267396610add5bc0d0d3e77f546d4cd287200804fe02323797de77dbce9/websockets-15.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d5f6b181bb38171a8ad1d6aa58a67a6aa9d4b38d0f8c5f496b9e42561dfc62fe", size = 182790, upload-time = "2025-03-05T20:02:26.99Z" }, + { url = "https://files.pythonhosted.org/packages/02/05/c68c5adbf679cf610ae2f74a9b871ae84564462955d991178f95a1ddb7dd/websockets-15.0.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:5d54b09eba2bada6011aea5375542a157637b91029687eb4fdb2dab11059c1b4", size = 182165, upload-time = "2025-03-05T20:02:30.291Z" }, + { url = "https://files.pythonhosted.org/packages/29/93/bb672df7b2f5faac89761cb5fa34f5cec45a4026c383a4b5761c6cea5c16/websockets-15.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3be571a8b5afed347da347bfcf27ba12b069d9d7f42cb8c7028b5e98bbb12597", size = 182160, upload-time = "2025-03-05T20:02:31.634Z" }, + { url = "https://files.pythonhosted.org/packages/ff/83/de1f7709376dc3ca9b7eeb4b9a07b4526b14876b6d372a4dc62312bebee0/websockets-15.0.1-cp312-cp312-win32.whl", hash = "sha256:c338ffa0520bdb12fbc527265235639fb76e7bc7faafbb93f6ba80d9c06578a9", size = 176395, upload-time = "2025-03-05T20:02:33.017Z" }, + { url = "https://files.pythonhosted.org/packages/7d/71/abf2ebc3bbfa40f391ce1428c7168fb20582d0ff57019b69ea20fa698043/websockets-15.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:fcd5cf9e305d7b8338754470cf69cf81f420459dbae8a3b40cee57417f4614a7", size = 176841, upload-time = "2025-03-05T20:02:34.498Z" }, + { url = "https://files.pythonhosted.org/packages/cb/9f/51f0cf64471a9d2b4d0fc6c534f323b664e7095640c34562f5182e5a7195/websockets-15.0.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ee443ef070bb3b6ed74514f5efaa37a252af57c90eb33b956d35c8e9c10a1931", size = 175440, upload-time = "2025-03-05T20:02:36.695Z" }, + { url = "https://files.pythonhosted.org/packages/8a/05/aa116ec9943c718905997412c5989f7ed671bc0188ee2ba89520e8765d7b/websockets-15.0.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5a939de6b7b4e18ca683218320fc67ea886038265fd1ed30173f5ce3f8e85675", size = 173098, upload-time = "2025-03-05T20:02:37.985Z" }, + { url = "https://files.pythonhosted.org/packages/ff/0b/33cef55ff24f2d92924923c99926dcce78e7bd922d649467f0eda8368923/websockets-15.0.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:746ee8dba912cd6fc889a8147168991d50ed70447bf18bcda7039f7d2e3d9151", size = 173329, upload-time = "2025-03-05T20:02:39.298Z" }, + { url = "https://files.pythonhosted.org/packages/31/1d/063b25dcc01faa8fada1469bdf769de3768b7044eac9d41f734fd7b6ad6d/websockets-15.0.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:595b6c3969023ecf9041b2936ac3827e4623bfa3ccf007575f04c5a6aa318c22", size = 183111, upload-time = "2025-03-05T20:02:40.595Z" }, + { url = "https://files.pythonhosted.org/packages/93/53/9a87ee494a51bf63e4ec9241c1ccc4f7c2f45fff85d5bde2ff74fcb68b9e/websockets-15.0.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c714d2fc58b5ca3e285461a4cc0c9a66bd0e24c5da9911e30158286c9b5be7f", size = 182054, upload-time = "2025-03-05T20:02:41.926Z" }, + { url = "https://files.pythonhosted.org/packages/ff/b2/83a6ddf56cdcbad4e3d841fcc55d6ba7d19aeb89c50f24dd7e859ec0805f/websockets-15.0.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f3c1e2ab208db911594ae5b4f79addeb3501604a165019dd221c0bdcabe4db8", size = 182496, upload-time = "2025-03-05T20:02:43.304Z" }, + { url = "https://files.pythonhosted.org/packages/98/41/e7038944ed0abf34c45aa4635ba28136f06052e08fc2168520bb8b25149f/websockets-15.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:229cf1d3ca6c1804400b0a9790dc66528e08a6a1feec0d5040e8b9eb14422375", size = 182829, upload-time = "2025-03-05T20:02:48.812Z" }, + { url = "https://files.pythonhosted.org/packages/e0/17/de15b6158680c7623c6ef0db361da965ab25d813ae54fcfeae2e5b9ef910/websockets-15.0.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:756c56e867a90fb00177d530dca4b097dd753cde348448a1012ed6c5131f8b7d", size = 182217, upload-time = "2025-03-05T20:02:50.14Z" }, + { url = "https://files.pythonhosted.org/packages/33/2b/1f168cb6041853eef0362fb9554c3824367c5560cbdaad89ac40f8c2edfc/websockets-15.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:558d023b3df0bffe50a04e710bc87742de35060580a293c2a984299ed83bc4e4", size = 182195, upload-time = "2025-03-05T20:02:51.561Z" }, + { url = "https://files.pythonhosted.org/packages/86/eb/20b6cdf273913d0ad05a6a14aed4b9a85591c18a987a3d47f20fa13dcc47/websockets-15.0.1-cp313-cp313-win32.whl", hash = "sha256:ba9e56e8ceeeedb2e080147ba85ffcd5cd0711b89576b83784d8605a7df455fa", size = 176393, upload-time = "2025-03-05T20:02:53.814Z" }, + { url = "https://files.pythonhosted.org/packages/1b/6c/c65773d6cab416a64d191d6ee8a8b1c68a09970ea6909d16965d26bfed1e/websockets-15.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:e09473f095a819042ecb2ab9465aee615bd9c2028e4ef7d933600a8401c79561", size = 176837, upload-time = "2025-03-05T20:02:55.237Z" }, + { url = "https://files.pythonhosted.org/packages/36/db/3fff0bcbe339a6fa6a3b9e3fbc2bfb321ec2f4cd233692272c5a8d6cf801/websockets-15.0.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5f4c04ead5aed67c8a1a20491d54cdfba5884507a48dd798ecaf13c74c4489f5", size = 175424, upload-time = "2025-03-05T20:02:56.505Z" }, + { url = "https://files.pythonhosted.org/packages/46/e6/519054c2f477def4165b0ec060ad664ed174e140b0d1cbb9fafa4a54f6db/websockets-15.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:abdc0c6c8c648b4805c5eacd131910d2a7f6455dfd3becab248ef108e89ab16a", size = 173077, upload-time = "2025-03-05T20:02:58.37Z" }, + { url = "https://files.pythonhosted.org/packages/1a/21/c0712e382df64c93a0d16449ecbf87b647163485ca1cc3f6cbadb36d2b03/websockets-15.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a625e06551975f4b7ea7102bc43895b90742746797e2e14b70ed61c43a90f09b", size = 173324, upload-time = "2025-03-05T20:02:59.773Z" }, + { url = "https://files.pythonhosted.org/packages/1c/cb/51ba82e59b3a664df54beed8ad95517c1b4dc1a913730e7a7db778f21291/websockets-15.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d591f8de75824cbb7acad4e05d2d710484f15f29d4a915092675ad3456f11770", size = 182094, upload-time = "2025-03-05T20:03:01.827Z" }, + { url = "https://files.pythonhosted.org/packages/fb/0f/bf3788c03fec679bcdaef787518dbe60d12fe5615a544a6d4cf82f045193/websockets-15.0.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:47819cea040f31d670cc8d324bb6435c6f133b8c7a19ec3d61634e62f8d8f9eb", size = 181094, upload-time = "2025-03-05T20:03:03.123Z" }, + { url = "https://files.pythonhosted.org/packages/5e/da/9fb8c21edbc719b66763a571afbaf206cb6d3736d28255a46fc2fe20f902/websockets-15.0.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac017dd64572e5c3bd01939121e4d16cf30e5d7e110a119399cf3133b63ad054", size = 181397, upload-time = "2025-03-05T20:03:04.443Z" }, + { url = "https://files.pythonhosted.org/packages/2e/65/65f379525a2719e91d9d90c38fe8b8bc62bd3c702ac651b7278609b696c4/websockets-15.0.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:4a9fac8e469d04ce6c25bb2610dc535235bd4aa14996b4e6dbebf5e007eba5ee", size = 181794, upload-time = "2025-03-05T20:03:06.708Z" }, + { url = "https://files.pythonhosted.org/packages/d9/26/31ac2d08f8e9304d81a1a7ed2851c0300f636019a57cbaa91342015c72cc/websockets-15.0.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:363c6f671b761efcb30608d24925a382497c12c506b51661883c3e22337265ed", size = 181194, upload-time = "2025-03-05T20:03:08.844Z" }, + { url = "https://files.pythonhosted.org/packages/98/72/1090de20d6c91994cd4b357c3f75a4f25ee231b63e03adea89671cc12a3f/websockets-15.0.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:2034693ad3097d5355bfdacfffcbd3ef5694f9718ab7f29c29689a9eae841880", size = 181164, upload-time = "2025-03-05T20:03:10.242Z" }, + { url = "https://files.pythonhosted.org/packages/2d/37/098f2e1c103ae8ed79b0e77f08d83b0ec0b241cf4b7f2f10edd0126472e1/websockets-15.0.1-cp39-cp39-win32.whl", hash = "sha256:3b1ac0d3e594bf121308112697cf4b32be538fb1444468fb0a6ae4feebc83411", size = 176381, upload-time = "2025-03-05T20:03:12.77Z" }, + { url = "https://files.pythonhosted.org/packages/75/8b/a32978a3ab42cebb2ebdd5b05df0696a09f4d436ce69def11893afa301f0/websockets-15.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:b7643a03db5c95c799b89b31c036d5f27eeb4d259c798e878d6937d71832b1e4", size = 176841, upload-time = "2025-03-05T20:03:14.367Z" }, + { url = "https://files.pythonhosted.org/packages/02/9e/d40f779fa16f74d3468357197af8d6ad07e7c5a27ea1ca74ceb38986f77a/websockets-15.0.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0c9e74d766f2818bb95f84c25be4dea09841ac0f734d1966f415e4edfc4ef1c3", size = 173109, upload-time = "2025-03-05T20:03:17.769Z" }, + { url = "https://files.pythonhosted.org/packages/bc/cd/5b887b8585a593073fd92f7c23ecd3985cd2c3175025a91b0d69b0551372/websockets-15.0.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:1009ee0c7739c08a0cd59de430d6de452a55e42d6b522de7aa15e6f67db0b8e1", size = 173343, upload-time = "2025-03-05T20:03:19.094Z" }, + { url = "https://files.pythonhosted.org/packages/fe/ae/d34f7556890341e900a95acf4886833646306269f899d58ad62f588bf410/websockets-15.0.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76d1f20b1c7a2fa82367e04982e708723ba0e7b8d43aa643d3dcd404d74f1475", size = 174599, upload-time = "2025-03-05T20:03:21.1Z" }, + { url = "https://files.pythonhosted.org/packages/71/e6/5fd43993a87db364ec60fc1d608273a1a465c0caba69176dd160e197ce42/websockets-15.0.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f29d80eb9a9263b8d109135351caf568cc3f80b9928bccde535c235de55c22d9", size = 174207, upload-time = "2025-03-05T20:03:23.221Z" }, + { url = "https://files.pythonhosted.org/packages/2b/fb/c492d6daa5ec067c2988ac80c61359ace5c4c674c532985ac5a123436cec/websockets-15.0.1-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b359ed09954d7c18bbc1680f380c7301f92c60bf924171629c5db97febb12f04", size = 174155, upload-time = "2025-03-05T20:03:25.321Z" }, + { url = "https://files.pythonhosted.org/packages/68/a1/dcb68430b1d00b698ae7a7e0194433bce4f07ded185f0ee5fb21e2a2e91e/websockets-15.0.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:cad21560da69f4ce7658ca2cb83138fb4cf695a2ba3e475e0559e05991aa8122", size = 176884, upload-time = "2025-03-05T20:03:27.934Z" }, + { url = "https://files.pythonhosted.org/packages/b7/48/4b67623bac4d79beb3a6bb27b803ba75c1bdedc06bd827e465803690a4b2/websockets-15.0.1-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7f493881579c90fc262d9cdbaa05a6b54b3811c2f300766748db79f098db9940", size = 173106, upload-time = "2025-03-05T20:03:29.404Z" }, + { url = "https://files.pythonhosted.org/packages/ed/f0/adb07514a49fe5728192764e04295be78859e4a537ab8fcc518a3dbb3281/websockets-15.0.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:47b099e1f4fbc95b701b6e85768e1fcdaf1630f3cbe4765fa216596f12310e2e", size = 173339, upload-time = "2025-03-05T20:03:30.755Z" }, + { url = "https://files.pythonhosted.org/packages/87/28/bd23c6344b18fb43df40d0700f6d3fffcd7cef14a6995b4f976978b52e62/websockets-15.0.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:67f2b6de947f8c757db2db9c71527933ad0019737ec374a8a6be9a956786aaf9", size = 174597, upload-time = "2025-03-05T20:03:32.247Z" }, + { url = "https://files.pythonhosted.org/packages/6d/79/ca288495863d0f23a60f546f0905ae8f3ed467ad87f8b6aceb65f4c013e4/websockets-15.0.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d08eb4c2b7d6c41da6ca0600c077e93f5adcfd979cd777d747e9ee624556da4b", size = 174205, upload-time = "2025-03-05T20:03:33.731Z" }, + { url = "https://files.pythonhosted.org/packages/04/e4/120ff3180b0872b1fe6637f6f995bcb009fb5c87d597c1fc21456f50c848/websockets-15.0.1-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b826973a4a2ae47ba357e4e82fa44a463b8f168e1ca775ac64521442b19e87f", size = 174150, upload-time = "2025-03-05T20:03:35.757Z" }, + { url = "https://files.pythonhosted.org/packages/cb/c3/30e2f9c539b8da8b1d76f64012f3b19253271a63413b2d3adb94b143407f/websockets-15.0.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:21c1fa28a6a7e3cbdc171c694398b6df4744613ce9b36b1a498e816787e28123", size = 176877, upload-time = "2025-03-05T20:03:37.199Z" }, + { url = "https://files.pythonhosted.org/packages/fa/a8/5b41e0da817d64113292ab1f8247140aac61cbf6cfd085d6a0fa77f4984f/websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f", size = 169743, upload-time = "2025-03-05T20:03:39.41Z" }, +] + +[[package]] +name = "win32-setctime" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b3/8f/705086c9d734d3b663af0e9bb3d4de6578d08f46b1b101c2442fd9aecaa2/win32_setctime-1.2.0.tar.gz", hash = "sha256:ae1fdf948f5640aae05c511ade119313fb6a30d7eabe25fef9764dca5873c4c0", size = 4867, upload-time = "2024-12-07T15:28:28.314Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e1/07/c6fe3ad3e685340704d314d765b7912993bcb8dc198f0e7a89382d37974b/win32_setctime-1.2.0-py3-none-any.whl", hash = "sha256:95d644c4e708aba81dc3704a116d8cbc974d70b3bdb8be1d150e36be6e9d1390", size = 4083, upload-time = "2024-12-07T15:28:26.465Z" }, +] diff --git a/service-secrets.yaml b/service-secrets.yaml index 19f6f94..a5c926b 100644 --- a/service-secrets.yaml +++ b/service-secrets.yaml @@ -92,9 +92,16 @@ spec: value: "500" - name: KERNEL_IMAGES_API_OUTPUT_DIR value: "/tmp/recordings" - # Force new revision + # Eval-Server configuration + - name: EVAL_SERVER_WS_PORT + value: "8082" + - name: EVAL_SERVER_HTTP_PORT + value: "8083" + - name: EVAL_SERVER_HOST + value: "127.0.0.1" + # Force new revision - name: DEPLOYMENT_VERSION - value: "v13-tcp-only-webrtc" + value: "v16-eval-server-with-nodejs" traffic: - percent: 100 latestRevision: true \ No newline at end of file diff --git a/service.yaml b/service.yaml index c78c45b..22204e9 100644 --- a/service.yaml +++ b/service.yaml @@ -82,9 +82,16 @@ spec: value: "500" - name: KERNEL_IMAGES_API_OUTPUT_DIR value: "/tmp/recordings" - # Force new revision + # Eval-Server configuration + - name: EVAL_SERVER_WS_PORT + value: "8082" + - name: EVAL_SERVER_HTTP_PORT + value: "8083" + - name: EVAL_SERVER_HOST + value: "127.0.0.1" + # Force new revision - name: DEPLOYMENT_VERSION - value: "v13-tcp-only-fallback" + value: "v16-eval-server-with-nodejs" traffic: - percent: 100 latestRevision: true \ No newline at end of file diff --git a/supervisor/services-cloudrun/eval-server.conf b/supervisor/services-cloudrun/eval-server.conf new file mode 100644 index 0000000..b605e98 --- /dev/null +++ b/supervisor/services-cloudrun/eval-server.conf @@ -0,0 +1,12 @@ +[program:eval-server] +command=/usr/bin/node /opt/eval-server/start-cloudrun.js +directory=/opt/eval-server +autostart=true +autorestart=true +startsecs=5 +priority=25 +stdout_logfile=/var/log/supervisord/eval-server/eval-server.log +stdout_logfile_maxbytes=50MB +redirect_stderr=true +environment=HOME="/home/kernel",USER="kernel",NODE_ENV="production",EVAL_SERVER_WS_PORT="8082",EVAL_SERVER_HTTP_PORT="8083",EVAL_SERVER_HOST="127.0.0.1" +user=kernel diff --git a/test-eval-server.sh b/test-eval-server.sh new file mode 100755 index 0000000..a7d9569 --- /dev/null +++ b/test-eval-server.sh @@ -0,0 +1,30 @@ +#!/bin/bash +set -e + +echo "๐Ÿงช Testing eval-server startup script..." + +# Build only the eval-server stage +echo "๐Ÿ“ฆ Building eval-server stage..." +docker build \ + --file Dockerfile.cloudrun \ + --target eval-server-builder \ + -t eval-server-test \ + . + +echo "โœ… Build successful!" +echo "" +echo "๐Ÿ“‚ Contents of /eval-server:" +docker run --rm eval-server-test ls -la /eval-server + +echo "" +echo "๐Ÿ“„ Checking package.json:" +docker run --rm eval-server-test cat /eval-server/package.json | grep '"type"' + +echo "" +echo "๐Ÿ” Checking if node_modules exist:" +docker run --rm eval-server-test ls -la /eval-server/node_modules | head -5 + +echo "" +echo "โœ… All checks passed! Eval-server build is working." +echo "" +echo "Next: Test the full image with 'docker build -f Dockerfile.cloudrun -t kernel-browser:cloudrun-test .'" From e6ed2284a004ad35beca4f0c21c890e0315ab391 Mon Sep 17 00:00:00 2001 From: Oleh Luchkiv Date: Sun, 5 Oct 2025 14:54:16 -0500 Subject: [PATCH 22/25] Enable AUTOMATED_MODE in DevTools build for eval-server auto-connect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ๐Ÿ“ Changes: - Modified Dockerfile.cloudrun to set AUTOMATED_MODE: true in BuildConfig.ts before building DevTools - This ensures DevTools automatically enables evaluation mode and connects to ws://localhost:8082 ๐Ÿค– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- Dockerfile.cloudrun | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Dockerfile.cloudrun b/Dockerfile.cloudrun index e525eed..9c15eb1 100644 --- a/Dockerfile.cloudrun +++ b/Dockerfile.cloudrun @@ -49,7 +49,10 @@ RUN git remote add upstream https://github.com/BrowserOperator/browser-operator- RUN git fetch upstream RUN git checkout upstream/main -# Build Browser Operator version +# Enable AUTOMATED_MODE for Docker deployment +RUN sed -i 's/AUTOMATED_MODE: false/AUTOMATED_MODE: true/' front_end/panels/ai_chat/core/BuildConfig.ts + +# Build Browser Operator version with AUTOMATED_MODE enabled RUN npm run build # Eval-Server build stage From 612f5aa11046788f541aaeb5b8fd574f6ab19200 Mon Sep 17 00:00:00 2001 From: Oleh Luchkiv Date: Sun, 5 Oct 2025 16:38:35 -0500 Subject: [PATCH 23/25] Add --auto-open-devtools-for-tabs to auto-open DevTools panel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ๐Ÿ“ Changes: - Added --auto-open-devtools-for-tabs flag to CHROMIUM_FLAGS - Updated cloudrun-wrapper.sh, service.yaml, and service-secrets.yaml - This ensures DevTools opens automatically when browser starts, allowing immediate connection to eval-server ๐Ÿค– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- cloudrun-wrapper.sh | 2 +- service-secrets.yaml | 2 +- service.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cloudrun-wrapper.sh b/cloudrun-wrapper.sh index 34a9428..67cdcb1 100644 --- a/cloudrun-wrapper.sh +++ b/cloudrun-wrapper.sh @@ -29,7 +29,7 @@ fi # Port configuration for Cloud Run export PORT=${PORT:-8080} -export CHROMIUM_FLAGS="${CHROMIUM_FLAGS:---user-data-dir=/home/kernel/user-data --disable-dev-shm-usage --disable-gpu --start-maximized --disable-software-rasterizer --remote-allow-origins=* --no-sandbox --disable-setuid-sandbox --disable-features=VizDisplayCompositor --custom-devtools-frontend=http://localhost:8001/ https://www.google.com}" +export CHROMIUM_FLAGS="${CHROMIUM_FLAGS:---user-data-dir=/home/kernel/user-data --disable-dev-shm-usage --disable-gpu --start-maximized --disable-software-rasterizer --remote-allow-origins=* --no-sandbox --disable-setuid-sandbox --disable-features=VizDisplayCompositor --custom-devtools-frontend=http://localhost:8001/ --auto-open-devtools-for-tabs https://www.google.com}" # Setup directories with proper permissions mkdir -p /tmp/nginx_client_temp /tmp/nginx_proxy_temp /tmp/nginx_fastcgi_temp \ diff --git a/service-secrets.yaml b/service-secrets.yaml index a5c926b..bd585f0 100644 --- a/service-secrets.yaml +++ b/service-secrets.yaml @@ -50,7 +50,7 @@ spec: value: "false" # Chrome optimizations for Cloud Run - name: CHROMIUM_FLAGS - value: "--user-data-dir=/home/kernel/user-data --disable-dev-shm-usage --disable-gpu --start-maximized --disable-software-rasterizer --remote-allow-origins=* --no-sandbox --disable-setuid-sandbox --disable-features=VizDisplayCompositor" + value: "--user-data-dir=/home/kernel/user-data --disable-dev-shm-usage --disable-gpu --start-maximized --disable-software-rasterizer --remote-allow-origins=* --no-sandbox --disable-setuid-sandbox --disable-features=VizDisplayCompositor --custom-devtools-frontend=http://localhost:8001/ --auto-open-devtools-for-tabs" # Display configuration - name: DISPLAY_NUM value: "1" diff --git a/service.yaml b/service.yaml index 22204e9..9a19969 100644 --- a/service.yaml +++ b/service.yaml @@ -50,7 +50,7 @@ spec: value: "false" # Chrome optimizations for Cloud Run - name: CHROMIUM_FLAGS - value: "--user-data-dir=/home/kernel/user-data --disable-dev-shm-usage --disable-gpu --start-maximized --disable-software-rasterizer --remote-allow-origins=* --no-sandbox --disable-setuid-sandbox --disable-features=VizDisplayCompositor" + value: "--user-data-dir=/home/kernel/user-data --disable-dev-shm-usage --disable-gpu --start-maximized --disable-software-rasterizer --remote-allow-origins=* --no-sandbox --disable-setuid-sandbox --disable-features=VizDisplayCompositor --custom-devtools-frontend=http://localhost:8001/ --auto-open-devtools-for-tabs" # Display configuration - name: DISPLAY_NUM value: "1" From d7aff66afde3f287d11c89301fad27819cc84a44 Mon Sep 17 00:00:00 2001 From: Oleh Luchkiv Date: Sun, 5 Oct 2025 23:10:07 -0500 Subject: [PATCH 24/25] Add python3 to final image for devtools-frontend HTTP server MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ๐Ÿ“ Changes: - Added python3 and python-is-python3 to runtime dependencies in Dockerfile.cloudrun - Required for supervisor devtools-frontend service which runs: python3 -m http.server 8001 - This will allow the custom DevTools frontend to be served properly ๐Ÿค– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- Dockerfile.cloudrun | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile.cloudrun b/Dockerfile.cloudrun index 9c15eb1..c66acf6 100644 --- a/Dockerfile.cloudrun +++ b/Dockerfile.cloudrun @@ -206,7 +206,7 @@ ENV USERNAME=kernel RUN set -eux; \ apt-get update; \ apt-get install -y --no-install-recommends \ - wget ca-certificates python2 supervisor xclip xdotool \ + wget ca-certificates python2 python3 python-is-python3 supervisor xclip xdotool \ pulseaudio dbus-x11 xserver-xorg-video-dummy \ libcairo2 libxcb1 libxrandr2 libxv1 libopus0 libvpx7 \ gstreamer1.0-plugins-base gstreamer1.0-plugins-good \ From 3e81cf8a0d7d88642267fcbaa2714e1c3e0d616f Mon Sep 17 00:00:00 2001 From: Oleh Luchkiv Date: Mon, 6 Oct 2025 16:09:43 -0500 Subject: [PATCH 25/25] Add --auto-open-devtools-for-tabs to supervisor chromium.conf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ๐Ÿ“ Changes: - Added --auto-open-devtools-for-tabs flag to hardcoded command in chromium.conf - This will automatically open DevTools when browser starts ๐Ÿค– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- supervisor/services-cloudrun/chromium.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/supervisor/services-cloudrun/chromium.conf b/supervisor/services-cloudrun/chromium.conf index d8413f5..ae71cf1 100644 --- a/supervisor/services-cloudrun/chromium.conf +++ b/supervisor/services-cloudrun/chromium.conf @@ -1,5 +1,5 @@ [program:chromium] -command=/bin/bash -lc 'sleep 3 && DISPLAY=":1" DBUS_SESSION_BUS_ADDRESS="unix:path=/tmp/dbus/session_bus_socket" chromium --remote-debugging-port=9223 --remote-allow-origins=* --user-data-dir=/home/kernel/user-data --password-store=basic --no-first-run --disable-dev-shm-usage --disable-gpu --start-maximized --disable-software-rasterizer --no-sandbox --disable-setuid-sandbox --disable-features=VizDisplayCompositor --custom-devtools-frontend=http://localhost:8001/' +command=/bin/bash -lc 'sleep 3 && DISPLAY=":1" DBUS_SESSION_BUS_ADDRESS="unix:path=/tmp/dbus/session_bus_socket" chromium --remote-debugging-port=9223 --remote-allow-origins=* --user-data-dir=/home/kernel/user-data --password-store=basic --no-first-run --disable-dev-shm-usage --disable-gpu --start-maximized --disable-software-rasterizer --no-sandbox --disable-setuid-sandbox --disable-features=VizDisplayCompositor --custom-devtools-frontend=http://localhost:8001/ --auto-open-devtools-for-tabs' autostart=true autorestart=true startsecs=8