API Updates (#10)

* Specified Language field in audio transcription test and example * Added speech endpoint support to audio package * Updated audio example * Added missing fields to chat request * Updated common.makeRequest to handle error responses when the requested response type is a byte slice * Added missing fields to legacy completions endpoint * Fixed deprecated comment * Deprecated edits package * Added EncodingFormat to embeddings request * Added files.MakeRetrieveContentRequestNoDisk * Deprecated finetunes package * Updated README to reflect deprecations * Updated models package
Kardbord · Nov 12, 2023 · 3d37da6 · 3d37da6
1 parent 232ba6b
commit 3d37da6
Show file tree

Hide file tree

Showing 13 changed files with 295 additions and 73 deletions.
diff --git a/.gitignore b/.gitignore
@@ -33,4 +33,7 @@ examples/files/files
 examples/finetunes/finetunes
 examples/images/images
 examples/models/models
-examples/moderations/moderations
+examples/moderations/moderations
+
+# Example outputs
+examples/audio/speech-creation.mp3
diff --git a/README.md b/README.md
@@ -12,10 +12,11 @@ The links below lead to examples of how to use each library package.
 - [x] [Audio](./audio/README.md)
 - [x] [Chat](./chat/README.md)
 - [x] [Completions](./completions/README.md)
-- [x] [Edits](./edits/README.md)
+- [x] ~~[Edits](./edits/README.md)~~ (Deprecated)
 - [x] [Embeddings](./embeddings/README.md)
+- [] Fine-Tuning
 - [x] [Files](./files/README.md)
-- [x] [Fine-Tunes](./finetunes/README.md) (Additional Testing Needed)
+- [x] ~~[Fine-Tunes](./finetunes/README.md)~~ (Deprecated)
 - [x] [Images](./images/README.md)
 - [x] [Models](./models/README.md)
 - [x] [Moderations](./moderations/README.md)

diff --git a/audio/audio.go b/audio/audio.go
@@ -20,13 +20,16 @@ const (
 	BaseEndpoint         = common.BaseURL + "audio/"
 	TransciptionEndpoint = BaseEndpoint + "transcriptions"
 	TranslationEndpoint  = BaseEndpoint + "translations"
+	SpeechEndpoint       = BaseEndpoint + "speech"
 )
 
 type ResponseFormat = string
 
 const (
 	// TODO: Support non-json return formats.
-	JSONResponseFormat = "json"
+	ResponseFormatJSON = "json"
+	// [deprecated]: Use ResponseFormatJSON instead
+	JSONResponseFormat = ResponseFormatJSON
 	//TextResponseFormat        = "text"
 	//SRTResponseFormat         = "srt"
 	//VerboseJSONResponseFormat = "verbose_json"
@@ -152,3 +155,46 @@ func MakeTranslationRequest(request *TranslationRequest, organizationID *string)
 	}
 	return r, nil
 }
+
+const (
+	VoiceAlloy   = "alloy"
+	VoiceEcho    = "echo"
+	VoiceFable   = "fable"
+	VoiceOnyx    = "onyx"
+	VoiceNova    = "nova"
+	VoiceShimmer = "shimmer"
+
+	SpeechFormatMp3  = "mp3"
+	SpeechFormatOpus = "opus"
+	SpeechFormatAac  = "aac"
+	SpeechFormatFlac = "flac"
+)
+
+// Request structure for the create speech endpoint.
+type SpeechRequest struct {
+	// One of the available TTS models.
+	Model string `json:"model"`
+
+	// The text to generate audio for. The maximum length is 4096 characters.
+	Input string `json:"input"`
+
+	// The voice to use when generating the audio.
+	Voice string `json:"voice"`
+
+	// The format to audio in.
+	ResponseFormat ResponseFormat `json:"response_format,omitempty"`
+
+	// The speed of the generated audio. Select a value from 0.25 to 4.0. 1.0 is the default.
+	Speed float64 `json:"speed,omitempty"`
+}
+
+func MakeSpeechRequest(request *SpeechRequest, organizationID *string) ([]byte, error) {
+	r, err := common.MakeRequest[SpeechRequest, []byte](request, SpeechEndpoint, http.MethodPost, organizationID)
+	if err != nil {
+		return nil, err
+	}
+	if r == nil {
+		return nil, errors.New("nil response received")
+	}
+	return *r, nil
+}
diff --git a/audio/audio_test.go b/audio/audio_test.go
@@ -1,11 +1,13 @@
 package audio_test
 
 import (
+	"errors"
 	"os"
 	"testing"
 
 	"github.com/TannerKvarfordt/gopenai/audio"
 	"github.com/TannerKvarfordt/gopenai/authentication"
+	"github.com/TannerKvarfordt/gopenai/common"
 )
 
 const (
@@ -22,8 +24,9 @@ func init() {
 
 func TestTranscription(t *testing.T) {
 	resp, err := audio.MakeTranscriptionRequest(&audio.TranscriptionRequest{
-		File:  transcriptionFilePath,
-		Model: model,
+		File:     transcriptionFilePath,
+		Model:    model,
+		Language: "en",
 	}, nil)
 	if err != nil {
 		t.Fatal(err)
@@ -49,3 +52,36 @@ func TestTranslation(t *testing.T) {
 		return
 	}
 }
+
+func TestSpeech(t *testing.T) {
+	resp, err := audio.MakeSpeechRequest(&audio.SpeechRequest{
+		Model:          "tts-1",
+		Input:          "The quick brown fox jumps over the lazy dog.",
+		Voice:          audio.VoiceAlloy,
+		ResponseFormat: audio.SpeechFormatMp3,
+	}, nil)
+	if err != nil {
+		t.Fatal(err)
+		return
+	}
+	if len(resp) == 0 {
+		t.Fatal("No audio returned")
+		return
+	}
+}
+
+func TestInvalidSpeechRequest(t *testing.T) {
+	_, err := audio.MakeSpeechRequest(&audio.SpeechRequest{
+		Model:          "",
+		Input:          "The quick brown fox jumps over the lazy dog.",
+		ResponseFormat: audio.SpeechFormatMp3,
+	}, nil)
+	if err == nil {
+		t.Fatal("Expected to receive an invalid request error")
+		return
+	}
+	respErr := new(common.ResponseError)
+	if !errors.As(err, &respErr) {
+		t.Fatal("Expected error to be of type common.ResponseError")
+	}
+}
diff --git a/chat/chat.go b/chat/chat.go
@@ -24,23 +24,99 @@ const (
 	AssistantRole Role = "assistant"
 )
 
+type FunctionCall struct {
+	Arguments string `json:"arguments"`
+	Name      string `json:"name"`
+}
+
+type ToolCall struct {
+	ID       string       `json:"id"`
+	Type     string       `json:"type"`
+	Function FunctionCall `json:"function"`
+}
+
 type Chat struct {
-	Role    Role   `json:"role"`
-	Content string `json:"content"`
+	Content   string     `json:"content"`
+	ToolCalls []ToolCall `json:"tool_calls,omitempty"`
+	Role      Role       `json:"role"`
+
+	// Deprecated: Use ToolCalls instead
+	FunctionCall []FunctionCall `json:"function_call,omitempty"`
+}
+
+type ResponseFormat struct {
+	// Must be one of text or json_object.
+	Type string `json:"type,omitempty"`
 }
 
 // Request structure for the chat API endpoint.
 type Request struct {
+	// The messages to generate chat completions for,
+	// in the [chat format].
+	//
+	// [chat format]: https://platform.openai.com/docs/guides/chat
+	Messages []Chat `json:"messages"`
+
 	// ID of the model to use. You can use the List models API
 	// to see all of your available models, or see our Model
 	// overview for descriptions of them.
 	Model string `json:"model"`
 
-	// The messages to generate chat completions for,
-	// in the [chat format].
+	// Number between -2.0 and 2.0. Positive values penalize new
+	// tokens based on their existing frequency in the text so far,
+	// decreasing the model's likelihood to repeat the same line verbatim.
+	FrequencyPenalty *float64 `json:"frequency_penalty,omitempty"`
+
+	// Modify the likelihood of specified tokens appearing in the completion.
+	// Accepts a json object that maps tokens (specified by their token ID in
+	// the tokenizer) to an associated bias value from -100 to 100. Mathematically,
+	// the bias is added to the logits generated by the model prior to sampling.
+	// The exact effect will vary per model, but values between -1 and 1 should decrease
+	// or increase likelihood of selection; values like -100 or 100 should result in a
+	// ban or exclusive selection of the relevant token.
+	LogitBias map[string]int64 `json:"logit_bias,omitempty"`
+
+	// The maximum number of tokens to generate in the chat completion.
+	// The total length of input tokens and generated tokens is limited
+	// by the model's context length.
+	MaxTokens *int64 `json:"max_tokens,omitempty"`
+
+	// How many chat completion choices to generate for each input message.
+	N *int64 `json:"n,omitempty"`
+
+	// Number between -2.0 and 2.0. Positive values penalize new tokens
+	// based on their existing frequency in the text so far, decreasing
+	// the model's likelihood to repeat the same line verbatim.
+	PresencePenalty *float64 `json:"presence_penalty,omitempty"`
+
+	// An object specifying the format that the model must output.
+	// Setting to "json_object" enables JSON mode, which guarantees
+	// the message the model generates is valid JSON.
 	//
-	// [chat format]: https://platform.openai.com/docs/guides/chat
-	Messages []Chat `json:"messages"`
+	// When using JSON mode, you must also instruct the model to produce
+	// JSON yourself via a system or user message. Without this, the model
+	// may generate an unending stream of whitespace until the generation
+	// reaches the token limit, resulting in a long-running and seemingly
+	// "stuck" request. Also note that the message content may be partially
+	// cut off if finish_reason="length", which indicates the generation
+	// exceeded max_tokens or the conversation exceeded the max context length.
+	ResponseFormat *ResponseFormat `json:"response_format,omitempty"`
+
+	// This feature is in Beta. If specified, our system will make a best effort
+	// to sample deterministically, such that repeated requests with the same
+	// seed and parameters should return the same result. Determinism is not
+	// guaranteed, and you should refer to the system_fingerprint response
+	// parameter to monitor changes in the backend.
+	Seed int64 `json:"seed,omitempty"`
+
+	// Up to 4 sequences where the API will stop generating further tokens.
+	Stop []string `json:"stop,omitempty"`
+
+	// If set, partial message deltas will be sent, like in ChatGPT. Tokens
+	// will be sent as data-only server-sent events as they become available,
+	// with the stream terminated by a data: [DONE] message. See the OpenAI
+	// Cookbook for example code.
+	// Stream bool `json:"stream,omitempty"` TODO: Add streaming support
 
 	// What sampling temperature to use, between 0 and 2. Higher values
 	// like 0.8 will make the output more random, while lower values like
@@ -55,52 +131,28 @@ type Request struct {
 	// We generally recommend altering this or temperature but not both.
 	TopP *float64 `json:"top_p,omitempty"`
 
-	// How many chat completion choices to generate for each input message.
-	N *int64 `json:"n,omitempty"`
-
-	// If set, partial message deltas will be sent, like in ChatGPT. Tokens
-	// will be sent as data-only server-sent events as they become available,
-	// with the stream terminated by a data: [DONE] message. See the OpenAI
-	// Cookbook for example code.
-	// Stream bool `json:"stream,omitempty"` TODO: Add streaming support
-
-	// Up to 4 sequences where the API will stop generating further tokens.
-	Stop []string `json:"stop,omitempty"`
-
-	// The maximum number of tokens to generate in the chat completion.
-	// The total length of input tokens and generated tokens is limited
-	// by the model's context length.
-	MaxTokens *int64 `json:"max_tokens,omitempty"`
-
-	// Number between -2.0 and 2.0. Positive values penalize new tokens
-	// based on their existing frequency in the text so far, decreasing
-	// the model's likelihood to repeat the same line verbatim.
-	PresencePenalty *float64 `json:"presence_penalty,omitempty"`
-
-	// Modify the likelihood of specified tokens appearing in the completion.
-	// Accepts a json object that maps tokens (specified by their token ID in
-	// the tokenizer) to an associated bias value from -100 to 100. Mathematically,
-	// the bias is added to the logits generated by the model prior to sampling.
-	// The exact effect will vary per model, but values between -1 and 1 should decrease
-	// or increase likelihood of selection; values like -100 or 100 should result in a
-	// ban or exclusive selection of the relevant token.
-	LogitBias map[string]int64 `json:"logit_bias,omitempty"`
+	// TODO: Support tools
+	// Tools []Tool `json:"tools,omitempty"`
+	// TODO: Support ToolChoice
+	// ToolChoice ToolChoice `json:"tool_choice,omitempty"`
 
 	// A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.
 	User string `json:"user,omitempty"`
 }
 
 type Response struct {
 	ID      string `json:"id,omitempty"`
-	Object  string `json:"object,omitempty"`
-	Created int64  `json:"created,omitempty"`
 	Choices []struct {
 		Index        int64  `json:"index,omitempty"`
 		Message      Chat   `json:"message,omitempty"`
 		FinishReason string `json:"finish_reason,omitempty"`
 	}
-	Usage common.ResponseUsage  `json:"usage"`
-	Error *common.ResponseError `json:"error,omitempty"`
+	Created           int64                 `json:"created,omitempty"`
+	Model             string                `json:"model,omitempty"`
+	SystemFingerprint string                `json:"system_fingerprint,omitempty"`
+	Object            string                `json:"object,omitempty"`
+	Usage             common.ResponseUsage  `json:"usage"`
+	Error             *common.ResponseError `json:"error,omitempty"`
 }
 
 func MakeRequest(request *Request, organizationID *string) (*Response, error) {

diff --git a/common/common.go b/common/common.go
@@ -11,6 +11,7 @@ import (
 	"net/http"
 	"net/url"
 	"os"
+	"reflect"
 	"strings"
 
 	auth "github.com/TannerKvarfordt/gopenai/authentication"
@@ -24,6 +25,10 @@ const (
 	BaseURL = "https://api.openai.com/" + APIVersion + "/"
 )
 
+type responseErrorWrapper struct {
+	Error *ResponseError `json:"error,omitempty"`
+}
+
 // A common error structure included in OpenAI API response bodies.
 type ResponseError struct {
 	// The error message.
@@ -117,13 +122,29 @@ func makeRequest[ResponseT any](req *http.Request) (*ResponseT, error) {
 		return nil, errors.New("unable to parse response body")
 	}
 
-	response := new(ResponseT)
-	err = json.Unmarshal(respBody, response)
+	var response ResponseT
+	if _, ok := any(response).([]byte); ok {
+		// Special case for handling binary return types.
+		// Defer to the caller to do what they will with
+		// the response.
+		v := reflect.ValueOf(&response).Elem()
+		v.Set(reflect.MakeSlice(v.Type(), len(respBody), cap(respBody)))
+		v.SetBytes(respBody)
+
+		respErr := responseErrorWrapper{}
+		json.Unmarshal(respBody, &respErr)
+		if respErr.Error != nil {
+			return &response, respErr.Error
+		}
+		return &response, nil
+	}
+
+	err = json.Unmarshal(respBody, &response)
 	if err != nil {
 		return nil, err
 	}
 
-	return response, nil
+	return &response, nil
 }
 
 func IsUrl(str string) bool {

diff --git a/completions/completions.go b/completions/completions.go
@@ -107,6 +107,12 @@ type Request struct {
 
 	// A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.
 	User string `json:"user,omitempty"`
+
+	// If specified, our system will make a best effort to sample deterministically, such that repeated
+	// requests with the same seed and parameters should return the same result. Determinism is not
+	// guaranteed, and you should refer to the system_fingerprint response parameter to monitor changes
+	// in the backend.
+	Seed *int64 `json:"seed,omitempty"`
 }
 
 // Response structure for the  completions API endpoint.
@@ -126,8 +132,9 @@ type Response struct {
 			TextOffset    []uint64             `json:"text_offset"`
 		} `json:"logprobs"`
 	} `json:"choices"`
-	Usage common.ResponseUsage  `json:"usage"`
-	Error *common.ResponseError `json:"error,omitempty"`
+	SystemFingerprint string                `json:"system_fingerprint"`
+	Usage             common.ResponseUsage  `json:"usage"`
+	Error             *common.ResponseError `json:"error,omitempty"`
 }
 
 // Make a completions request.