Skip to content

Commit 96963b4

Browse files
.Net: Support BinaryContent in OpenAI Connector (#11644)
### Motivation and Context The OpenAI Connector only supports `TextContent`, `ImageContent`, and `AudioContent` KernelContent types. However, the Chat Completion API supports uploading other file types encoded as a base64 string. This change leverages that behavior to add support for `BinaryContent`. ### Description This change adds support for `BinaryContent` KernelContent types by using `ChatMessageContentPart.CreateFilePart` to create the corresponding `ChatMessageContentPart`. ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone 😄 --------- Co-authored-by: Roger Barreto <19890735+RogerBarreto@users.noreply.github.com>
1 parent 5f54cfb commit 96963b4

File tree

7 files changed

+281
-82
lines changed

7 files changed

+281
-82
lines changed
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
// Copyright (c) Microsoft. All rights reserved.
2+
3+
using Microsoft.SemanticKernel;
4+
using Microsoft.SemanticKernel.ChatCompletion;
5+
using Resources;
6+
7+
namespace ChatCompletion;
8+
9+
/// <summary>
10+
/// This example shows how to use binary files input with OpenAI's chat completion.
11+
/// </summary>
12+
public class OpenAI_ChatCompletionWithFile(ITestOutputHelper output) : BaseTest(output)
13+
{
14+
/// <summary>
15+
/// This uses a local file as input for your chat
16+
/// </summary>
17+
[Fact]
18+
public async Task UsingLocalFileInChatCompletion()
19+
{
20+
var fileBytes = await EmbeddedResource.ReadAllAsync("employees.pdf");
21+
22+
var kernel = Kernel.CreateBuilder()
23+
.AddOpenAIChatCompletion(TestConfiguration.OpenAI.ChatModelId, TestConfiguration.OpenAI.ApiKey)
24+
.Build();
25+
26+
var chatCompletionService = kernel.GetRequiredService<IChatCompletionService>();
27+
28+
var chatHistory = new ChatHistory("You are a friendly assistant.");
29+
30+
chatHistory.AddUserMessage(
31+
[
32+
new TextContent("What's in this file?"),
33+
new BinaryContent(fileBytes, "application/pdf")
34+
]);
35+
36+
var reply = await chatCompletionService.GetChatMessageContentAsync(chatHistory);
37+
38+
Console.WriteLine(reply.Content);
39+
}
40+
41+
/// <summary>
42+
/// This uses a Base64 data URI as a binary file input for your chat
43+
/// </summary>
44+
[Fact]
45+
public async Task UsingBase64DataUriInChatCompletion()
46+
{
47+
var fileBytes = await EmbeddedResource.ReadAllAsync("employees.pdf");
48+
var fileBase64 = Convert.ToBase64String(fileBytes.ToArray());
49+
var dataUri = $"data:application/pdf;base64,{fileBase64}";
50+
51+
var kernel = Kernel.CreateBuilder()
52+
.AddOpenAIChatCompletion(TestConfiguration.OpenAI.ChatModelId, TestConfiguration.OpenAI.ApiKey)
53+
.Build();
54+
55+
var chatCompletionService = kernel.GetRequiredService<IChatCompletionService>();
56+
57+
var chatHistory = new ChatHistory("You are a friendly assistant.");
58+
59+
chatHistory.AddUserMessage(
60+
[
61+
new TextContent("What's in this file?"),
62+
new BinaryContent(dataUri)
63+
]);
64+
65+
var reply = await chatCompletionService.GetChatMessageContentAsync(chatHistory);
66+
67+
Console.WriteLine(reply.Content);
68+
}
69+
}

dotnet/samples/Concepts/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ dotnet test -l "console;verbosity=detailed" --filter "FullyQualifiedName=ChatCom
8181
- [OpenAI_ChatCompletionStreaming](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/OpenAI_ChatCompletionStreaming.cs)
8282
- [OpenAI_ChatCompletionWebSearch](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/OpenAI_ChatCompletionWebSearch.cs)
8383
- [OpenAI_ChatCompletionWithAudio](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/OpenAI_ChatCompletionWithAudio.cs)
84+
- [OpenAI_ChatCompletionWithFile](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/OpenAI_ChatCompletionWithFile.cs)
8485
- [OpenAI_ChatCompletionWithReasoning](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/OpenAI_ChatCompletionWithReasoning.cs)
8586
- [OpenAI_ChatCompletionWithVision](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/OpenAI_ChatCompletionWithVision.cs)
8687
- [OpenAI_CustomClient](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/OpenAI_CustomClient.cs)
42.4 KB
Binary file not shown.

dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Services/OpenAIChatCompletionServiceTests.cs

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2144,4 +2144,85 @@ public async Task ItHandlesAudioContentWithMetadataInResponseAsync()
21442144
Assert.NotNull(audioContent.Metadata["ExpiresAt"]);
21452145
// The ExpiresAt value is converted to a DateTime object, so we can't directly compare it to the Unix timestamp
21462146
}
2147+
2148+
[Fact]
2149+
public async Task GetChatMessageContentsThrowsExceptionWithEmptyBinaryContentAsync()
2150+
{
2151+
// Arrange
2152+
var chatCompletion = new OpenAIChatCompletionService(modelId: "gpt-4o-mini", apiKey: "NOKEY", httpClient: this._httpClient);
2153+
2154+
var chatHistory = new ChatHistory();
2155+
chatHistory.AddUserMessage([new Microsoft.SemanticKernel.BinaryContent()]);
2156+
2157+
// Act & Assert
2158+
await Assert.ThrowsAsync<ArgumentException>(() => chatCompletion.GetChatMessageContentsAsync(chatHistory));
2159+
}
2160+
2161+
[Fact]
2162+
public async Task GetChatMessageContentsThrowsExceptionUriOnlyReferenceBinaryContentAsync()
2163+
{
2164+
// Arrange
2165+
var chatCompletion = new OpenAIChatCompletionService(modelId: "gpt-4o-mini", apiKey: "NOKEY", httpClient: this._httpClient);
2166+
2167+
var chatHistory = new ChatHistory();
2168+
chatHistory.AddUserMessage([new Microsoft.SemanticKernel.BinaryContent(new Uri("file://testfile.pdf"))]);
2169+
2170+
// Act & Assert
2171+
await Assert.ThrowsAsync<ArgumentException>(() => chatCompletion.GetChatMessageContentsAsync(chatHistory));
2172+
}
2173+
2174+
[Theory]
2175+
[InlineData(true)]
2176+
[InlineData(false)]
2177+
public async Task ItSendsBinaryContentCorrectlyAsync(bool useUriData)
2178+
{
2179+
// Arrange
2180+
var chatCompletion = new OpenAIChatCompletionService(modelId: "gpt-4o-mini", apiKey: "NOKEY", httpClient: this._httpClient);
2181+
this._messageHandlerStub.ResponseToReturn = new HttpResponseMessage(System.Net.HttpStatusCode.OK)
2182+
{
2183+
Content = new StringContent(ChatCompletionResponse)
2184+
};
2185+
2186+
var mimeType = "application/pdf";
2187+
var chatHistory = new ChatHistory();
2188+
chatHistory.AddUserMessage([
2189+
new TextContent("What's in this file?"),
2190+
useUriData
2191+
? new Microsoft.SemanticKernel.BinaryContent($"data:{mimeType};base64,{PdfBase64Data}")
2192+
: new Microsoft.SemanticKernel.BinaryContent(Convert.FromBase64String(PdfBase64Data), mimeType)
2193+
]);
2194+
2195+
// Act
2196+
await chatCompletion.GetChatMessageContentsAsync(chatHistory);
2197+
2198+
// Assert
2199+
var actualRequestContent = Encoding.UTF8.GetString(this._messageHandlerStub.RequestContent!);
2200+
Assert.NotNull(actualRequestContent);
2201+
var optionsJson = JsonSerializer.Deserialize<JsonElement>(actualRequestContent);
2202+
2203+
var messages = optionsJson.GetProperty("messages");
2204+
Assert.Equal(1, messages.GetArrayLength());
2205+
2206+
var contentItems = messages[0].GetProperty("content");
2207+
Assert.Equal(2, contentItems.GetArrayLength());
2208+
2209+
Assert.Equal("text", contentItems[0].GetProperty("type").GetString());
2210+
Assert.Equal("What's in this file?", contentItems[0].GetProperty("text").GetString());
2211+
2212+
Assert.Equal("file", contentItems[1].GetProperty("type").GetString());
2213+
2214+
// Check for the file data
2215+
Assert.True(contentItems[1].TryGetProperty("file", out var fileData));
2216+
Assert.Equal(JsonValueKind.Object, fileData.ValueKind);
2217+
Assert.True(fileData.TryGetProperty("file_data", out var dataProperty));
2218+
var dataUriFile = dataProperty.GetString();
2219+
2220+
Assert.NotNull(dataUriFile);
2221+
Assert.Equal($"data:{mimeType};base64,{PdfBase64Data}", dataUriFile);
2222+
}
2223+
2224+
/// <summary>
2225+
/// Sample PDF data URI for testing.
2226+
/// </summary>
2227+
private const string PdfBase64Data = "JVBERi0xLjQKMSAwIG9iago8PC9UeXBlIC9DYXRhbG9nCi9QYWdlcyAyIDAgUgo+PgplbmRvYmoKMiAwIG9iago8PC9UeXBlIC9QYWdlcwovS2lkcyBbMyAwIFJdCi9Db3VudCAxCj4+CmVuZG9iagozIDAgb2JqCjw8L1R5cGUgL1BhZ2UKL1BhcmVudCAyIDAgUgovTWVkaWFCb3ggWzAgMCA1OTUgODQyXQovQ29udGVudHMgNSAwIFIKL1Jlc291cmNlcyA8PC9Qcm9jU2V0IFsvUERGIC9UZXh0XQovRm9udCA8PC9GMSA0IDAgUj4+Cj4+Cj4+CmVuZG9iago0IDAgb2JqCjw8L1R5cGUgL0ZvbnQKL1N1YnR5cGUgL1R5cGUxCi9OYW1lIC9GMQovQmFzZUZvbnQgL0hlbHZldGljYQovRW5jb2RpbmcgL01hY1JvbWFuRW5jb2RpbmcKPj4KZW5kb2JqCjUgMCBvYmoKPDwvTGVuZ3RoIDUzCj4+CnN0cmVhbQpCVAovRjEgMjAgVGYKMjIwIDQwMCBUZAooRHVtbXkgUERGKSBUagpFVAplbmRzdHJlYW0KZW5kb2JqCnhyZWYKMCA2CjAwMDAwMDAwMDAgNjU1MzUgZgowMDAwMDAwMDA5IDAwMDAwIG4KMDAwMDAwMDA2MyAwMDAwMCBuCjAwMDAwMDAxMjQgMDAwMDAgbgowMDAwMDAwMjc3IDAwMDAwIG4KMDAwMDAwMDM5MiAwMDAwMCBuCnRyYWlsZXIKPDwvU2l6ZSA2Ci9Sb290IDEgMCBSCj4+CnN0YXJ0eHJlZgo0OTUKJSVFT0YK";
21472228
}

dotnet/src/Connectors/Connectors.OpenAI/Core/ClientCore.ChatCompletion.cs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -777,6 +777,7 @@ private static List<ChatMessage> CreateRequestMessages(ChatMessageContent messag
777777
TextContent textContent => ChatMessageContentPart.CreateTextPart(textContent.Text),
778778
ImageContent imageContent => GetImageContentItem(imageContent),
779779
AudioContent audioContent => GetAudioContentItem(audioContent),
780+
BinaryContent binaryContent => GetBinaryContentItem(binaryContent),
780781
_ => throw new NotSupportedException($"Unsupported chat message content type '{item.GetType()}'.")
781782
}))
782783
{ ParticipantName = message.AuthorName }
@@ -887,6 +888,16 @@ private static ChatMessageContentPart GetAudioContentItem(AudioContent audioCont
887888
throw new ArgumentException($"{nameof(AudioContent)} must have Data bytes.");
888889
}
889890

891+
private static ChatMessageContentPart GetBinaryContentItem(BinaryContent binaryContent)
892+
{
893+
if (binaryContent.Data is { IsEmpty: false } data)
894+
{
895+
return ChatMessageContentPart.CreateFilePart(BinaryData.FromBytes(data), binaryContent.MimeType, Guid.NewGuid().ToString());
896+
}
897+
898+
throw new ArgumentException($"{nameof(BinaryContent)} must have Data bytes.");
899+
}
900+
890901
private static ChatInputAudioFormat GetChatInputAudioFormat(string? mimeType)
891902
{
892903
if (string.IsNullOrWhiteSpace(mimeType))

0 commit comments

Comments
 (0)