Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,5 @@ target/
*.iml

# Misc
CLUADE.md
CLAUDE.md
.claude/
36 changes: 36 additions & 0 deletions agenteval-judge/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<parent>
<groupId>com.agenteval</groupId>
<artifactId>agenteval-parent</artifactId>
<version>0.1.0-SNAPSHOT</version>
</parent>

<artifactId>agenteval-judge</artifactId>
<name>AgentEval Judge</name>
<description>LLM-as-judge engine with OpenAI and Anthropic provider integrations</description>

<dependencies>
<dependency>
<groupId>com.agenteval</groupId>
<artifactId>agenteval-core</artifactId>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package com.agenteval.judge;

/**
* Base unchecked exception for judge module errors.
*/
public class JudgeException extends RuntimeException {

private static final long serialVersionUID = 1L;

public JudgeException(String message) {
super(message);
}

public JudgeException(String message, Throwable cause) {
super(message, cause);
}
}
79 changes: 79 additions & 0 deletions agenteval-judge/src/main/java/com/agenteval/judge/JudgeModels.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
package com.agenteval.judge;

import com.agenteval.core.judge.JudgeModel;
import com.agenteval.judge.config.JudgeConfig;
import com.agenteval.judge.provider.AnthropicJudgeModel;
import com.agenteval.judge.provider.OpenAiJudgeModel;

/**
* Static factory for creating judge model instances.
*
* <p>API key resolution order: explicit parameter, environment variable, fail fast.</p>
*
* <pre>{@code
* var judge = JudgeModels.openai("gpt-4o");
* var judge = JudgeModels.anthropic("claude-sonnet-4-20250514");
* var judge = JudgeModels.openai(JudgeConfig.builder()
* .apiKey("sk-...")
* .model("gpt-4o")
* .baseUrl("https://api.openai.com")
* .build());
* }</pre>
*/
public final class JudgeModels {

private static final String OPENAI_API_KEY_ENV = "OPENAI_API_KEY";
private static final String ANTHROPIC_API_KEY_ENV = "ANTHROPIC_API_KEY";
private static final String OPENAI_BASE_URL = "https://api.openai.com";
private static final String ANTHROPIC_BASE_URL = "https://api.anthropic.com";

private JudgeModels() {}

/**
* Creates an OpenAI judge model using the given model ID.
* API key is resolved from the {@code OPENAI_API_KEY} environment variable.
*/
public static JudgeModel openai(String model) {
return openai(JudgeConfig.builder()
.apiKey(resolveApiKey(OPENAI_API_KEY_ENV, "OpenAI"))
.model(model)
.baseUrl(OPENAI_BASE_URL)
.build());
}

/**
* Creates an OpenAI judge model with full configuration.
*/
public static JudgeModel openai(JudgeConfig config) {
return new OpenAiJudgeModel(config);
}

/**
* Creates an Anthropic judge model using the given model ID.
* API key is resolved from the {@code ANTHROPIC_API_KEY} environment variable.
*/
public static JudgeModel anthropic(String model) {
return anthropic(JudgeConfig.builder()
.apiKey(resolveApiKey(ANTHROPIC_API_KEY_ENV, "Anthropic"))
.model(model)
.baseUrl(ANTHROPIC_BASE_URL)
.build());
}

/**
* Creates an Anthropic judge model with full configuration.
*/
public static JudgeModel anthropic(JudgeConfig config) {
return new AnthropicJudgeModel(config);
}

private static String resolveApiKey(String envVar, String providerName) {
String key = System.getenv(envVar);
if (key == null || key.isBlank()) {
throw new JudgeException(
providerName + " API key not found. Set the " + envVar
+ " environment variable or provide it via JudgeConfig.builder().apiKey()");
}
return key;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package com.agenteval.judge;

import java.time.Duration;
import java.util.Optional;

/**
* Thrown when the judge LLM returns a 429 rate limit response.
*/
public class JudgeRateLimitException extends JudgeException {

private static final long serialVersionUID = 1L;

private final Duration retryAfter;

public JudgeRateLimitException(String message) {
this(message, null);
}

public JudgeRateLimitException(String message, Duration retryAfter) {
super(message);
this.retryAfter = retryAfter;
}

public Optional<Duration> getRetryAfter() {
return Optional.ofNullable(retryAfter);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package com.agenteval.judge;

import java.time.Duration;

/**
* Thrown when a judge LLM request exceeds the configured timeout.
*/
public class JudgeTimeoutException extends JudgeException {

private static final long serialVersionUID = 1L;

private final Duration timeout;

public JudgeTimeoutException(String message, Duration timeout) {
super(message);
this.timeout = timeout;
}

public JudgeTimeoutException(String message, Duration timeout, Throwable cause) {
super(message, cause);
this.timeout = timeout;
}

public Duration getTimeout() {
return timeout;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
package com.agenteval.judge.config;

import java.time.Duration;
import java.util.Objects;

/**
* Configuration for a judge LLM provider.
*/
public final class JudgeConfig {

private final String apiKey;
private final String model;
private final String baseUrl;
private final Duration timeout;
private final int maxRetries;
private final double temperature;

private JudgeConfig(Builder builder) {
this.apiKey = Objects.requireNonNull(builder.apiKey, "apiKey must not be null");
this.model = Objects.requireNonNull(builder.model, "model must not be null");
this.baseUrl = Objects.requireNonNull(builder.baseUrl, "baseUrl must not be null");
this.timeout = builder.timeout;
this.maxRetries = builder.maxRetries;
this.temperature = builder.temperature;
}

public static Builder builder() {
return new Builder();
}

public String getApiKey() { return apiKey; }

public String getModel() { return model; }

public String getBaseUrl() { return baseUrl; }

public Duration getTimeout() { return timeout; }

public int getMaxRetries() { return maxRetries; }

public double getTemperature() { return temperature; }

public static final class Builder {
private String apiKey;
private String model;
private String baseUrl;
private Duration timeout = Duration.ofSeconds(60);
private int maxRetries = 3;
private double temperature = 0.0;

private Builder() {}

public Builder apiKey(String apiKey) { this.apiKey = apiKey; return this; }

public Builder model(String model) { this.model = model; return this; }

public Builder baseUrl(String baseUrl) { this.baseUrl = baseUrl; return this; }

public Builder timeout(Duration timeout) { this.timeout = timeout; return this; }

public Builder maxRetries(int maxRetries) {
if (maxRetries < 0) {
throw new IllegalArgumentException("maxRetries must be non-negative");
}
this.maxRetries = maxRetries;
return this;
}

public Builder temperature(double temperature) {
if (temperature < 0.0 || temperature > 2.0) {
throw new IllegalArgumentException(
"temperature must be between 0.0 and 2.0, got: " + temperature);
}
this.temperature = temperature;
return this;
}

public JudgeConfig build() {
return new JudgeConfig(this);
}
}
}
Loading