Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion chat.go
Original file line number Diff line number Diff line change
Expand Up @@ -248,13 +248,24 @@ func (r *ChatCompletionResponseFormatJSONSchema) UnmarshalJSON(data []byte) erro
return nil
}

// ChatCompletionRequestExtensions contains third-party OpenAI API extensions
// (e.g., vendor-specific implementations like vLLM).
type ChatCompletionRequestExtensions struct {
// GuidedChoice is a vLLM-specific extension that restricts the model's output
// to one of the predefined string choices provided in this field. This feature
// is used to constrain the model's responses to a controlled set of options,
// ensuring predictable and consistent outputs in scenarios where specific
// choices are required.
GuidedChoice []string `json:"guided_choice,omitempty"`
}

// ChatCompletionRequest represents a request structure for chat completion API.
type ChatCompletionRequest struct {
Model string `json:"model"`
Messages []ChatCompletionMessage `json:"messages"`
// MaxTokens The maximum number of tokens that can be generated in the chat completion.
// This value can be used to control costs for text generated via API.
// This value is now deprecated in favor of max_completion_tokens, and is not compatible with o1 series models.
// Deprecated: use MaxCompletionTokens. Not compatible with o1-series models.
// refs: https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens
MaxTokens int `json:"max_tokens,omitempty"`
// MaxCompletionTokens An upper bound for the number of tokens that can be generated for a completion,
Expand Down Expand Up @@ -309,6 +320,19 @@ type ChatCompletionRequest struct {
ChatTemplateKwargs map[string]any `json:"chat_template_kwargs,omitempty"`
// Specifies the latency tier to use for processing the request.
ServiceTier ServiceTier `json:"service_tier,omitempty"`
// Verbosity determines how many output tokens are generated. Lowering the number of
// tokens reduces overall latency. It can be set to "low", "medium", or "high".
// Note: This field is only confirmed to work with gpt-5, gpt-5-mini and gpt-5-nano.
// Also, it is not in the API reference of chat completion at the time of writing,
// though it is supported by the API.
Verbosity string `json:"verbosity,omitempty"`
// A stable identifier used to help detect users of your application that may be violating OpenAI's usage policies.
// The IDs should be a string that uniquely identifies each user.
// We recommend hashing their username or email address, in order to avoid sending us any identifying information.
// https://platform.openai.com/docs/api-reference/chat/create#chat_create-safety_identifier
SafetyIdentifier string `json:"safety_identifier,omitempty"`
// Embedded struct for non-OpenAI extensions
ChatCompletionRequestExtensions
}

type StreamOptions struct {
Expand Down
120 changes: 120 additions & 0 deletions chat_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,126 @@ func TestO3ModelsChatCompletionsBetaLimitations(t *testing.T) {
}
}

func TestGPT5ModelsChatCompletionsBetaLimitations(t *testing.T) {
tests := []struct {
name string
in openai.ChatCompletionRequest
expectedError error
}{
{
name: "log_probs_unsupported",
in: openai.ChatCompletionRequest{
MaxCompletionTokens: 1000,
LogProbs: true,
Model: openai.GPT5,
},
expectedError: openai.ErrReasoningModelLimitationsLogprobs,
},
{
name: "set_temperature_unsupported",
in: openai.ChatCompletionRequest{
MaxCompletionTokens: 1000,
Model: openai.GPT5Mini,
Messages: []openai.ChatCompletionMessage{
{
Role: openai.ChatMessageRoleUser,
},
{
Role: openai.ChatMessageRoleAssistant,
},
},
Temperature: float32(2),
},
expectedError: openai.ErrReasoningModelLimitationsOther,
},
{
name: "set_top_unsupported",
in: openai.ChatCompletionRequest{
MaxCompletionTokens: 1000,
Model: openai.GPT5Nano,
Messages: []openai.ChatCompletionMessage{
{
Role: openai.ChatMessageRoleUser,
},
{
Role: openai.ChatMessageRoleAssistant,
},
},
Temperature: float32(1),
TopP: float32(0.1),
},
expectedError: openai.ErrReasoningModelLimitationsOther,
},
{
name: "set_n_unsupported",
in: openai.ChatCompletionRequest{
MaxCompletionTokens: 1000,
Model: openai.GPT5ChatLatest,
Messages: []openai.ChatCompletionMessage{
{
Role: openai.ChatMessageRoleUser,
},
{
Role: openai.ChatMessageRoleAssistant,
},
},
Temperature: float32(1),
TopP: float32(1),
N: 2,
},
expectedError: openai.ErrReasoningModelLimitationsOther,
},
{
name: "set_presence_penalty_unsupported",
in: openai.ChatCompletionRequest{
MaxCompletionTokens: 1000,
Model: openai.GPT5,
Messages: []openai.ChatCompletionMessage{
{
Role: openai.ChatMessageRoleUser,
},
{
Role: openai.ChatMessageRoleAssistant,
},
},
PresencePenalty: float32(0.1),
},
expectedError: openai.ErrReasoningModelLimitationsOther,
},
{
name: "set_frequency_penalty_unsupported",
in: openai.ChatCompletionRequest{
MaxCompletionTokens: 1000,
Model: openai.GPT5Mini,
Messages: []openai.ChatCompletionMessage{
{
Role: openai.ChatMessageRoleUser,
},
{
Role: openai.ChatMessageRoleAssistant,
},
},
FrequencyPenalty: float32(0.1),
},
expectedError: openai.ErrReasoningModelLimitationsOther,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
config := openai.DefaultConfig("whatever")
config.BaseURL = "http://localhost/v1"
client := openai.NewClientWithConfig(config)
ctx := context.Background()

_, err := client.CreateChatCompletion(ctx, tt.in)
checks.HasError(t, err)
msg := fmt.Sprintf("CreateChatCompletion should return wrong model error, returned: %s", err)
checks.ErrorIs(t, err, tt.expectedError, msg)
})
}
}

func TestChatRequestOmitEmpty(t *testing.T) {
data, err := json.Marshal(openai.ChatCompletionRequest{
// We set model b/c it's required, so omitempty doesn't make sense
Expand Down
8 changes: 8 additions & 0 deletions completion.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ const (
GPT4Dot1Nano20250414 = "gpt-4.1-nano-2025-04-14"
GPT4Dot5Preview = "gpt-4.5-preview"
GPT4Dot5Preview20250227 = "gpt-4.5-preview-2025-02-27"
GPT5 = "gpt-5"
GPT5Mini = "gpt-5-mini"
GPT5Nano = "gpt-5-nano"
GPT5ChatLatest = "gpt-5-chat-latest"
GPT3Dot5Turbo0125 = "gpt-3.5-turbo-0125"
GPT3Dot5Turbo1106 = "gpt-3.5-turbo-1106"
GPT3Dot5Turbo0613 = "gpt-3.5-turbo-0613"
Expand Down Expand Up @@ -142,6 +146,10 @@ var disabledModelsForEndpoints = map[string]map[string]bool{
GPT4Dot1Mini20250414: true,
GPT4Dot1Nano: true,
GPT4Dot1Nano20250414: true,
GPT5: true,
GPT5Mini: true,
GPT5Nano: true,
GPT5ChatLatest: true,
},
chatCompletionsSuffix: {
CodexCodeDavinci002: true,
Expand Down
29 changes: 29 additions & 0 deletions completion_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -300,3 +300,32 @@ func TestCompletionWithGPT4oModels(t *testing.T) {
})
}
}

// TestCompletionWithGPT5Models Tests that GPT5 models are not supported for completion endpoint.
func TestCompletionWithGPT5Models(t *testing.T) {
config := openai.DefaultConfig("whatever")
config.BaseURL = "http://localhost/v1"
client := openai.NewClientWithConfig(config)

models := []string{
openai.GPT5,
openai.GPT5Mini,
openai.GPT5Nano,
openai.GPT5ChatLatest,
}

for _, model := range models {
t.Run(model, func(t *testing.T) {
_, err := client.CreateCompletion(
context.Background(),
openai.CompletionRequest{
MaxTokens: 5,
Model: model,
},
)
if !errors.Is(err, openai.ErrCompletionUnsupportedModel) {
t.Fatalf("CreateCompletion should return ErrCompletionUnsupportedModel for %s model, but returned: %v", model, err)
}
})
}
}
9 changes: 5 additions & 4 deletions reasoning_validator.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,21 +28,22 @@ var (
ErrReasoningModelLimitationsOther = errors.New("this model has beta-limitations, temperature, top_p and n are fixed at 1, while presence_penalty and frequency_penalty are fixed at 0") //nolint:lll
)

// ReasoningValidator handles validation for o-series model requests.
// ReasoningValidator handles validation for reasoning model requests.
type ReasoningValidator struct{}

// NewReasoningValidator creates a new validator for o-series models.
// NewReasoningValidator creates a new validator for reasoning models.
func NewReasoningValidator() *ReasoningValidator {
return &ReasoningValidator{}
}

// Validate performs all validation checks for o-series models.
// Validate performs all validation checks for reasoning models.
func (v *ReasoningValidator) Validate(request ChatCompletionRequest) error {
o1Series := strings.HasPrefix(request.Model, "o1")
o3Series := strings.HasPrefix(request.Model, "o3")
o4Series := strings.HasPrefix(request.Model, "o4")
gpt5Series := strings.HasPrefix(request.Model, "gpt-5")

if !o1Series && !o3Series && !o4Series {
if !o1Series && !o3Series && !o4Series && !gpt5Series {
return nil
}

Expand Down
Loading