/
DocumentAnalysisClient.java
238 lines (228 loc) · 14.6 KB
/
DocumentAnalysisClient.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.azure.ai.formrecognizer;
import com.azure.ai.formrecognizer.implementation.models.AnalyzeResultOperation;
import com.azure.ai.formrecognizer.implementation.models.OperationStatus;
import com.azure.ai.formrecognizer.models.AnalyzeDocumentOptions;
import com.azure.ai.formrecognizer.models.AnalyzeResult;
import com.azure.ai.formrecognizer.models.DocumentModelOperationException;
import com.azure.ai.formrecognizer.models.DocumentOperationResult;
import com.azure.core.annotation.ReturnType;
import com.azure.core.annotation.ServiceClient;
import com.azure.core.annotation.ServiceMethod;
import com.azure.core.util.Context;
import com.azure.core.util.polling.SyncPoller;
import reactor.core.publisher.Flux;
import java.io.InputStream;
import java.nio.ByteBuffer;
import static com.azure.ai.formrecognizer.implementation.util.Utility.toFluxByteBuffer;
/**
* This class provides a synchronous client that contains the operations that apply to Azure Form Recognizer.
* Operations allowed by the client are analyzing information from documents and images using custom-built document
* analysis models, prebuilt models for invoices, receipts, identity documents and business cards, and the layout model.
*
* <p><strong>Instantiating an asynchronous Document Analysis Client</strong></p>
*
* <!-- src_embed com.azure.ai.formrecognizer.DocumentAnalysisClient.instantiation -->
* <pre>
* DocumentAnalysisClient documentAnalysisClient = new DocumentAnalysisClientBuilder()
* .credential(new AzureKeyCredential("{key}"))
* .endpoint("{endpoint}")
* .buildClient();
* </pre>
* <!-- end com.azure.ai.formrecognizer.DocumentAnalysisClient.instantiation -->
*
* @see DocumentAnalysisClientBuilder
*/
@ServiceClient(builder = DocumentAnalysisClientBuilder.class)
public final class DocumentAnalysisClient {
private final DocumentAnalysisAsyncClient client;
/**
* Create a {@link DocumentAnalysisClient client} that sends requests to the Document Analysis service's endpoint.
* Each service call goes through the {@link DocumentAnalysisClientBuilder#pipeline http pipeline}.
*
* @param client The {@link DocumentAnalysisClient} that the client routes its request through.
*/
DocumentAnalysisClient(DocumentAnalysisAsyncClient client) {
this.client = client;
}
/**
* Analyzes data from documents with optical character recognition (OCR) and semantic values from a given document
* using any of the prebuilt models or a custom-built analysis model.
* <p>The service does not support cancellation of the long running operation and returns with an error message
* indicating absence of cancellation support</p>
*
* <p><strong>Code sample</strong></p>
* <p> Analyze a document using the URL of the document. </p>
* <!-- src_embed com.azure.ai.formrecognizer.DocumentAnalysisClient.beginAnalyzeDocumentFromUrl#string-string -->
* <pre>
* String documentUrl = "{document_url}";
* String modelId = "{custom_trained_model_id}";
*
* documentAnalysisClient.beginAnalyzeDocumentFromUrl(modelId, documentUrl).getFinalResult()
* .getDocuments().stream()
* .map(AnalyzedDocument::getFields)
* .forEach(documentFieldMap -> documentFieldMap.forEach((key, documentField) -> {
* System.out.printf("Field text: %s%n", key);
* System.out.printf("Field value data content: %s%n", documentField.getContent());
* System.out.printf("Confidence score: %.2f%n", documentField.getConfidence());
* }));
*
* </pre>
* <!-- end com.azure.ai.formrecognizer.DocumentAnalysisClient.beginAnalyzeDocumentFromUrl#string-string -->
*
* @param modelId The unique model ID to be used. Use this to specify the custom model ID or prebuilt model ID.
* Prebuilt model IDs supported can be found <a href="https://aka.ms/azsdk/formrecognizer/models">here</a>
* @param documentUrl The URL of the document to analyze.
*
* @return A {@link SyncPoller} to poll the progress of the analyze document operation until it has completed,
* has failed, or has been cancelled. The completed operation returns an {@link AnalyzeResult}.
* @throws DocumentModelOperationException If analyze operation fails and the {@link AnalyzeResultOperation} returns
* with an {@link OperationStatus#FAILED}..
* @throws IllegalArgumentException If {@code documentUrl} or {@code modelId} is null.
*/
@ServiceMethod(returns = ReturnType.LONG_RUNNING_OPERATION)
public SyncPoller<DocumentOperationResult, AnalyzeResult>
beginAnalyzeDocumentFromUrl(String modelId, String documentUrl) {
return beginAnalyzeDocumentFromUrl(modelId, documentUrl, null, Context.NONE);
}
/**
* Analyzes data from documents with optical character recognition (OCR) and semantic values from a given document
* using any of the prebuilt models or a custom-built analysis model.
* <p>The service does not support cancellation of the long running operation and returns with an
* error message indicating absence of cancellation support</p>
*
* <p><strong>Code sample</strong></p>
* <p> Analyze a document using the URL of the document with configurable options. </p>
* <!-- src_embed com.azure.ai.formrecognizer.DocumentAnalysisClient.beginAnalyzeDocumentFromUrl#string-string -->
* <pre>
* String documentUrl = "{document_url}";
* String modelId = "{custom_trained_model_id}";
*
* documentAnalysisClient.beginAnalyzeDocumentFromUrl(modelId, documentUrl).getFinalResult()
* .getDocuments().stream()
* .map(AnalyzedDocument::getFields)
* .forEach(documentFieldMap -> documentFieldMap.forEach((key, documentField) -> {
* System.out.printf("Field text: %s%n", key);
* System.out.printf("Field value data content: %s%n", documentField.getContent());
* System.out.printf("Confidence score: %.2f%n", documentField.getConfidence());
* }));
*
* </pre>
* <!-- end com.azure.ai.formrecognizer.DocumentAnalysisClient.beginAnalyzeDocumentFromUrl#string-string -->
*
* @param modelId The unique model ID to be used. Use this to specify the custom model ID or prebuilt model ID.
* Prebuilt model IDs supported can be found <a href="https://aka.ms/azsdk/formrecognizer/models">here</a>
* @param documentUrl The source URL to the input document.
* @param analyzeDocumentOptions The additional configurable {@link AnalyzeDocumentOptions options} that may be
* passed when analyzing documents.
* @param context Additional context that is passed through the HTTP pipeline during the service call.
*
* @return A {@link SyncPoller} to poll the progress of the analyze document operation until it has completed,
* has failed, or has been cancelled. The completed operation returns an {@link AnalyzeResult}.
* @throws DocumentModelOperationException If analyze operation fails and the {@link AnalyzeResultOperation} returns
* with an {@link OperationStatus#FAILED}.
* @throws IllegalArgumentException If {@code documentUrl} or {@code modelId} is null.
*/
@ServiceMethod(returns = ReturnType.LONG_RUNNING_OPERATION)
public SyncPoller<DocumentOperationResult, AnalyzeResult>
beginAnalyzeDocumentFromUrl(String modelId, String documentUrl,
AnalyzeDocumentOptions analyzeDocumentOptions, Context context) {
return client.beginAnalyzeDocumentFromUrl(documentUrl, modelId,
analyzeDocumentOptions, context).getSyncPoller();
}
/**
* Analyzes data from documents using optical character recognition (OCR) using any of the prebuilt models or
* a custom-built analysis model.
* <p>The service does not support cancellation of the long running operation and returns with an
* error message indicating absence of cancellation support.</p>
*
* <p><strong>Code sample</strong></p>
* <!-- src_embed com.azure.ai.formrecognizer.DocumentAnalysisClient.beginAnalyzeDocument#string-InputStream-long -->
* <pre>
* File document = new File("{local/file_path/fileName.jpg}");
* String modelId = "{custom_trained_model_id}";
* byte[] fileContent = Files.readAllBytes(document.toPath());
* try (InputStream targetStream = new ByteArrayInputStream(fileContent)) {
*
* documentAnalysisClient.beginAnalyzeDocument(modelId, targetStream, document.length())
* .getFinalResult()
* .getDocuments().stream()
* .map(AnalyzedDocument::getFields)
* .forEach(documentFieldMap -> documentFieldMap.forEach((key, documentField) -> {
* System.out.printf("Field text: %s%n", key);
* System.out.printf("Field value data content: %s%n", documentField.getContent());
* System.out.printf("Confidence score: %.2f%n", documentField.getConfidence());
* }));
* }
* </pre>
* <!-- end com.azure.ai.formrecognizer.DocumentAnalysisClient.beginAnalyzeDocument#string-InputStream-long -->
*
* @param modelId The unique model ID to be used. Use this to specify the custom model ID or prebuilt model ID.
* Prebuilt model IDs supported can be found <a href="https://aka.ms/azsdk/formrecognizer/models">here</a>
* @param document The data of the document to analyze information from.
* @param length The exact length of the data.
*
* @return A {@link SyncPoller} that polls the of progress of analyze document operation until it has completed,
* has failed, or has been cancelled. The completed operation returns an {@link AnalyzeResult}.
* @throws DocumentModelOperationException If analyze operation fails and the {@link AnalyzeResultOperation}returns
* with an {@link OperationStatus#FAILED}.
* @throws IllegalArgumentException If {@code document} or {@code modelId} is null.
*/
@ServiceMethod(returns = ReturnType.LONG_RUNNING_OPERATION)
public SyncPoller<DocumentOperationResult, AnalyzeResult>
beginAnalyzeDocument(String modelId, InputStream document, long length) {
return beginAnalyzeDocument(modelId, document, length, null, Context.NONE);
}
/**
* Analyzes data from documents with optical character recognition (OCR) and semantic values from a given document
* using any of the prebuilt models or a custom-built analysis model.
* <p>The service does not support cancellation of the long running operation and returns with an
* error message indicating absence of cancellation support.</p>
*
* <p><strong>Code sample</strong></p>
* <p> Analyze a document with configurable options. </p>
* <!-- src_embed com.azure.ai.formrecognizer.DocumentAnalysisClient.beginAnalyzeDocument#string-InputStream-long-AnalyzeDocumentOptions-Context -->
* <pre>
* File document = new File("{local/file_path/fileName.jpg}");
* String modelId = "{custom_trained_model_id}";
* byte[] fileContent = Files.readAllBytes(document.toPath());
*
* try (InputStream targetStream = new ByteArrayInputStream(fileContent)) {
* documentAnalysisClient.beginAnalyzeDocument(modelId, targetStream, document.length(),
* new AnalyzeDocumentOptions().setPages(Arrays.asList("1", "3")), Context.NONE)
* .getFinalResult()
* .getDocuments().stream()
* .map(AnalyzedDocument::getFields)
* .forEach(documentFieldMap -> documentFieldMap.forEach((key, documentField) -> {
* System.out.printf("Field text: %s%n", key);
* System.out.printf("Field value data content: %s%n", documentField.getContent());
* System.out.printf("Confidence score: %.2f%n", documentField.getConfidence());
* }));
* }
* </pre>
* <!-- end com.azure.ai.formrecognizer.DocumentAnalysisClient.beginAnalyzeDocument#string-InputStream-long-AnalyzeDocumentOptions-Context -->
*
* @param modelId The unique model ID to be used. Use this to specify the custom model ID or prebuilt model ID.
* Prebuilt model IDs supported can be found <a href="https://aka.ms/azsdk/formrecognizer/models">here</a>
* @param document The data of the document to analyze information from.
* @param length The exact length of the data.
* @param analyzeDocumentOptions The additional configurable {@link AnalyzeDocumentOptions options} that may be
* passed when analyzing documents.
* @param context Additional context that is passed through the HTTP pipeline during the service call.
*
* @return A {@link SyncPoller} that polls the of progress of analyze document operation until it has completed,
* has failed, or has been cancelled. The completed operation returns an {@link AnalyzeResult}.
* @throws DocumentModelOperationException If analyze operation fails and the {@link AnalyzeResultOperation} returns
* with an {@link OperationStatus#FAILED}.
* @throws IllegalArgumentException If {@code document} or {@code modelId} is null.
*/
@ServiceMethod(returns = ReturnType.LONG_RUNNING_OPERATION)
public SyncPoller<DocumentOperationResult, AnalyzeResult>
beginAnalyzeDocument(String modelId, InputStream document, long length,
AnalyzeDocumentOptions analyzeDocumentOptions, Context context) {
Flux<ByteBuffer> buffer = toFluxByteBuffer(document);
return client.beginAnalyzeDocument(modelId, buffer, length,
analyzeDocumentOptions, context).getSyncPoller();
}
}