LangStream · eolivelli · Sep 25, 2023 · Sep 25, 2023 · Sep 25, 2023 · Sep 25, 2023
diff --git a/examples/applications/openai-completions/README.md b/examples/applications/openai-completions/README.md
@@ -1,6 +1,6 @@
-# OpenAI Completions
+# OpenAI Chat Completions
 
-This sample application shows how to execute completion using the OpenAI library using the Azure OpenAI API endpoint.
+This sample application shows how to execute Chat Completions using the OpenAI library using the Azure OpenAI API endpoint.
 
 ## Configure you OpenAI API Key
 

diff --git a/examples/applications/openai-text-completions/README.md b/examples/applications/openai-text-completions/README.md
@@ -0,0 +1,32 @@
+# OpenAI Instruct Completions
+
+This sample application shows how to use the `gpt-3.5-turbo-instruct` Open AI model.
+
+## Configure you OpenAI
+
+
+```
+export OPENAI_ACCESS_KEY=...
+```
+
+## Deploy the LangStream application
+```
+langstream docker run test -app https://github.com/LangStream/langstream/examples/applications/openai-text-completions -s https://raw.githubusercontent.com/LangStream/langstream/main/examples/secrets/secrets.yaml 
+```
+
+## Chat with the model
+
+```
+./bin/langstream gateway chat test -g chat
+```
+
+This model is optimized to run tasks. For example, you can ask it to translate a document into another language.
+
+```
+You: 
+> Translate "How are you?" in Italian
+```
+
+
+
+
diff --git a/examples/applications/openai-text-completions/configuration.yaml b/examples/applications/openai-text-completions/configuration.yaml
@@ -0,0 +1,23 @@
+#
+#
+# Copyright DataStax, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+configuration:
+  resources:
+    - type: "open-ai-configuration"
+      name: "OpenAI configuration"
+      configuration:
+        access-key: "{{ secrets.open-ai.access-key }}"
diff --git a/examples/applications/openai-text-completions/gateways.yaml b/examples/applications/openai-text-completions/gateways.yaml
@@ -0,0 +1,26 @@
+#
+#
+# Copyright DataStax, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+gateways:
+  - id: chat
+    type: chat
+    chat-options:
+      answers-topic: answers
+      questions-topic: questions
+      headers:
+        - key: langstream-client-session-id
+          value-from-parameters: sessionId
diff --git a/examples/applications/openai-text-completions/pipeline.yaml b/examples/applications/openai-text-completions/pipeline.yaml
@@ -0,0 +1,51 @@
+#
+# Copyright DataStax, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+topics:
+  - name: "questions"
+    creation-mode: create-if-not-exists
+  - name: "answers"
+    creation-mode: create-if-not-exists
+  - name: "debug"
+    creation-mode: create-if-not-exists
+pipeline:
+  - name: "convert-to-json"
+    type: "document-to-json"
+    input: "questions"
+    configuration:
+      text-field: "question"
+  - name: "ai-text-completions"
+    type: "ai-text-completions"
+    output: "debug"
+    configuration:
+      model: "{{{secrets.open-ai.text-completions-model}}}"
+      # on the log-topic we add a field with the answer
+      completion-field: "value.answer"
+      # we are also logging the prompt we sent to the LLM
+      log-field: "value.prompt"
+      # here we configure the streaming behavior
+      # as soon as the LLM answers with a chunk we send it to the answers-topic
+      stream-to-topic: "answers"
+      # on the streaming answer we send the answer as whole message
+      # the 'value' syntax is used to refer to the whole value of the message
+      stream-response-completion-field: "value"
+      # we want to stream the answer as soon as we have 10 chunks
+      # in order to reduce latency for the first message the agent sends the first message
+      # with 1 chunk, then with 2 chunks....up to the min-chunks-per-message value
+      # eventually we want to send bigger messages to reduce the overhead of each message on the topic
+      min-chunks-per-message: 10
+      prompt:
+        - "{{{% value.question}}}"
diff --git a/examples/secrets/secrets.yaml b/examples/secrets/secrets.yaml
@@ -29,6 +29,7 @@ secrets:
       provider: "${OPEN_AI_PROVIDER:-openai}"
       embeddings-model: "${OPEN_AI_EMBEDDINGS_MODEL:-text-embedding-ada-002}"
       chat-completions-model: "${OPEN_AI_CHAT_COMPLETIONS_MODEL:-gpt-3.5-turbo}"
+      text-completions-model: "${OPEN_AI_TEXT_COMPLETIONS_MODEL:-gpt-3.5-turbo-instruct}"
   - id: vertex-ai
     data:
       url: "${VERTEX_AI_URL:-https://us-central1-aiplatform.googleapis.com}"

diff --git a/...ts/langstream-ai-agents/src/main/java/ai/langstream/ai/agents/GenAIAgentCodeProvider.java b/...ts/langstream-ai-agents/src/main/java/ai/langstream/ai/agents/GenAIAgentCodeProvider.java
@@ -33,6 +33,7 @@ public class GenAIAgentCodeProvider implements AgentCodeProvider {
                     "compute-ai-embeddings",
                     "query",
                     "ai-chat-completions",
+                    "ai-text-completions",
                     "ai-tools" // legacy
                     );
 

diff --git a/...-agents/langstream-ai-agents/src/main/java/ai/langstream/ai/agents/GenAIToolKitAgent.java b/...-agents/langstream-ai-agents/src/main/java/ai/langstream/ai/agents/GenAIToolKitAgent.java
@@ -26,7 +26,6 @@
 import ai.langstream.api.runner.code.SimpleRecord;
 import ai.langstream.api.runner.topics.TopicProducer;
 import ai.langstream.api.runtime.ComponentType;
-import com.datastax.oss.streaming.ai.ChatCompletionsStep;
 import com.datastax.oss.streaming.ai.TransformContext;
 import com.datastax.oss.streaming.ai.TransformStep;
 import com.datastax.oss.streaming.ai.datasource.QueryStepDataSource;
@@ -35,6 +34,8 @@
 import com.datastax.oss.streaming.ai.model.config.StepConfig;
 import com.datastax.oss.streaming.ai.model.config.TransformStepConfig;
 import com.datastax.oss.streaming.ai.services.ServiceProvider;
+import com.datastax.oss.streaming.ai.streaming.StreamingAnswersConsumer;
+import com.datastax.oss.streaming.ai.streaming.StreamingAnswersConsumerFactory;
 import com.datastax.oss.streaming.ai.util.TransformFunctionUtil;
 import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.ObjectMapper;
@@ -337,13 +338,13 @@ private static TransformSchemaType getSchemaType(Class<?> javaType) {
     }
 
     private static class TopicProducerStreamingAnswersConsumerFactory
-            implements ChatCompletionsStep.StreamingAnswersConsumerFactory {
+            implements StreamingAnswersConsumerFactory {
         private AgentContext agentContext;
 
         public TopicProducerStreamingAnswersConsumerFactory() {}
 
         @Override
-        public ChatCompletionsStep.StreamingAnswersConsumer create(String topicName) {
+        public StreamingAnswersConsumer create(String topicName) {
             TopicProducer topicProducer =
                     agentContext
                             .getTopicConnectionProvider()
@@ -358,8 +359,7 @@ public void setAgentContext(AgentContext agentContext) {
         }
     }
 
-    private static class TopicStreamingAnswersConsumer
-            implements ChatCompletionsStep.StreamingAnswersConsumer {
+    private static class TopicStreamingAnswersConsumer implements StreamingAnswersConsumer {
         private TopicProducer topicProducer;
 
         public TopicStreamingAnswersConsumer(TopicProducer topicProducer) {

diff --git a/...am-ai-agents/src/main/java/ai/langstream/ai/agents/services/impl/HuggingFaceProvider.java b/...am-ai-agents/src/main/java/ai/langstream/ai/agents/services/impl/HuggingFaceProvider.java
@@ -161,6 +161,15 @@ public HuggingFaceCompletionsService(
                 this.httpClient = HttpClient.newHttpClient();
             }
 
+            @Override
+            public CompletableFuture<String> getTextCompletions(
+                    List<String> prompt,
+                    StreamingChunksConsumer streamingChunksConsumer,
+                    Map<String, Object> options) {
+                return CompletableFuture.failedFuture(
+                        new UnsupportedOperationException("Not implemented"));
+            }
+
             @Override
             @SneakyThrows
             public CompletableFuture<ChatCompletions> getChatCompletions(

diff --git a/...i-agents/src/main/java/ai/langstream/ai/agents/services/impl/OpenAICompletionService.java b/...i-agents/src/main/java/ai/langstream/ai/agents/services/impl/OpenAICompletionService.java
@@ -23,14 +23,17 @@
 import com.azure.ai.openai.models.ChatCompletionsOptions;
 import com.azure.ai.openai.models.ChatRole;
 import com.azure.ai.openai.models.CompletionsFinishReason;
+import com.azure.ai.openai.models.CompletionsOptions;
 import com.datastax.oss.streaming.ai.completions.ChatChoice;
 import com.datastax.oss.streaming.ai.completions.ChatCompletions;
 import com.datastax.oss.streaming.ai.completions.ChatMessage;
+import com.datastax.oss.streaming.ai.completions.Chunk;
 import com.datastax.oss.streaming.ai.completions.CompletionsService;
 import java.io.StringWriter;
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicReference;
 import java.util.function.Consumer;
@@ -207,4 +210,129 @@ public ChatMessage buildTotalAnswerMessage() {
             return new ChatMessage(role.get(), totalAnswer.toString());
         }
     }
+
+    @Override
+    public CompletableFuture<String> getTextCompletions(
+            List<String> prompt,
+            StreamingChunksConsumer streamingChunksConsumer,
+            Map<String, Object> options) {
+        int minChunksPerMessage = getInteger("min-chunks-per-message", 20, options);
+        CompletionsOptions completionsOptions =
+                new CompletionsOptions(prompt)
+                        .setMaxTokens(getInteger("max-tokens", null, options))
+                        .setTemperature(getDouble("temperature", null, options))
+                        .setTopP(getDouble("top-p", null, options))
+                        .setLogitBias((Map<String, Integer>) options.get("logit-bias"))
+                        .setStream(getBoolean("stream", true, options))
+                        .setUser((String) options.get("user"))
+                        .setStop((List<String>) options.get("stop"))
+                        .setPresencePenalty(getDouble("presence-penalty", null, options))
+                        .setFrequencyPenalty(getDouble("frequency-penalty", null, options));
+
+        // this is the default behavior, as it is async
+        // it works even if the streamingChunksConsumer is null
+        if (completionsOptions.isStream()) {
+            CompletableFuture<?> finished = new CompletableFuture<>();
+            Flux<com.azure.ai.openai.models.Completions> flux =
+                    client.getCompletionsStream((String) options.get("model"), completionsOptions);
+
+            TextCompletionsConsumer textCompletionsConsumer =
+                    new TextCompletionsConsumer(
+                            streamingChunksConsumer, minChunksPerMessage, finished);
+
+            flux.doOnError(
+                            error -> {
+                                log.error(
+                                        "Internal error while processing the streaming response",
+                                        error);
+                                finished.completeExceptionally(error);
+                            })
+                    .doOnNext(textCompletionsConsumer)
+                    .subscribe();
+
+            return finished.thenApply(___ -> textCompletionsConsumer.totalAnswer.toString());
+        } else {
+            com.azure.ai.openai.models.Completions completions =
+                    client.getCompletions((String) options.get("model"), completionsOptions)
+                            .block();
+            final String text = completions.getChoices().get(0).getText();
+            return CompletableFuture.completedFuture(text);
+        }
+    }
+
+    private static class TextCompletionsConsumer
+            implements Consumer<com.azure.ai.openai.models.Completions> {
+        private final StreamingChunksConsumer streamingChunksConsumer;
+        private final CompletableFuture<?> finished;
+
+        private final AtomicReference<String> role = new AtomicReference<>();
+        private final StringWriter totalAnswer = new StringWriter();
+
+        private final StringWriter writer = new StringWriter();
+        private final AtomicInteger numberOfChunks = new AtomicInteger();
+        private final int minChunksPerMessage;
+
+        private AtomicInteger currentChunkSize = new AtomicInteger(1);
+        private AtomicInteger index = new AtomicInteger();
+
+        private final AtomicBoolean firstChunk = new AtomicBoolean(true);
+
+        public TextCompletionsConsumer(
+                StreamingChunksConsumer streamingChunksConsumer,
+                int minChunksPerMessage,
+                CompletableFuture<?> finished) {
+            this.minChunksPerMessage = minChunksPerMessage;
+            this.streamingChunksConsumer =
+                    streamingChunksConsumer != null
+                            ? streamingChunksConsumer
+                            : (answerId, index, chunk, last) -> {};
+            this.finished = finished;
+        }
+
+        @Override
+        @SneakyThrows
+        public synchronized void accept(com.azure.ai.openai.models.Completions completions) {
+            List<com.azure.ai.openai.models.Choice> choices = completions.getChoices();
+            String answerId = completions.getId();
+            if (!choices.isEmpty()) {
+                com.azure.ai.openai.models.Choice first = choices.get(0);
+
+                CompletionsFinishReason finishReason = first.getFinishReason();
+                boolean last = finishReason != null;
+                final String content = first.getText();
+                if (content == null) {
+                    return;
+                }
+                if (firstChunk.compareAndSet(true, false)) {
+                    // Some models return two line break at the beginning of the first response,
+                    // even though this is not documented
+                    // https://community.openai.com/t/output-starts-often-with-linebreaks/36333/4
+                    if (content.isBlank()) {
+                        return;
+                    }
+                }
+                writer.write(content);
+                totalAnswer.write(content);
+                numberOfChunks.incrementAndGet();
+
+                // start from 1 chunk, then double the size until we reach the minChunksPerMessage
+                // this gives better latencies for the first message
+                int currentMinChunksPerMessage = currentChunkSize.get();
+
+                if (numberOfChunks.get() >= currentMinChunksPerMessage || last) {
+                    currentChunkSize.set(
+                            Math.min(currentMinChunksPerMessage * 2, minChunksPerMessage));
+                    final String chunkContent = writer.toString();
+                    final Chunk chunk = () -> chunkContent;
+                    streamingChunksConsumer.consumeChunk(
+                            answerId, index.incrementAndGet(), chunk, last);
+                    writer.getBuffer().setLength(0);
+                    numberOfChunks.set(0);
+                }
+                if (last) {
+                    finished.complete(null);
+                }
+            }
+        }
+    }
 }
diff --git a/...tream-ai-agents/src/main/java/ai/langstream/ai/agents/services/impl/VertexAIProvider.java b/...tream-ai-agents/src/main/java/ai/langstream/ai/agents/services/impl/VertexAIProvider.java
@@ -339,6 +339,15 @@ static class Message {
                 }
             }
 
+            @Override
+            public CompletableFuture<String> getTextCompletions(
+                    List<String> prompt,
+                    StreamingChunksConsumer streamingChunksConsumer,
+                    Map<String, Object> options) {
+                return CompletableFuture.failedFuture(
+                        new UnsupportedOperationException("Not implemented"));
+            }
+
             @Data
             static class Predictions {