/
TextAnalyticsClient.java
3766 lines (3708 loc) · 243 KB
/
TextAnalyticsClient.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.azure.ai.textanalytics;
import com.azure.ai.textanalytics.models.AbstractiveSummaryOperationDetail;
import com.azure.ai.textanalytics.models.AbstractiveSummaryOptions;
import com.azure.ai.textanalytics.models.AnalyzeActionsOperationDetail;
import com.azure.ai.textanalytics.models.AnalyzeActionsOptions;
import com.azure.ai.textanalytics.models.AnalyzeHealthcareEntitiesAction;
import com.azure.ai.textanalytics.models.AnalyzeHealthcareEntitiesOperationDetail;
import com.azure.ai.textanalytics.models.AnalyzeHealthcareEntitiesOptions;
import com.azure.ai.textanalytics.models.AnalyzeSentimentOptions;
import com.azure.ai.textanalytics.models.AnalyzeSentimentResult;
import com.azure.ai.textanalytics.models.CategorizedEntity;
import com.azure.ai.textanalytics.models.CategorizedEntityCollection;
import com.azure.ai.textanalytics.models.ClassifyDocumentOperationDetail;
import com.azure.ai.textanalytics.models.DetectLanguageInput;
import com.azure.ai.textanalytics.models.DetectLanguageResult;
import com.azure.ai.textanalytics.models.DetectedLanguage;
import com.azure.ai.textanalytics.models.DocumentSentiment;
import com.azure.ai.textanalytics.models.ExtractKeyPhraseResult;
import com.azure.ai.textanalytics.models.ExtractiveSummaryOperationDetail;
import com.azure.ai.textanalytics.models.ExtractiveSummaryOptions;
import com.azure.ai.textanalytics.models.KeyPhrasesCollection;
import com.azure.ai.textanalytics.models.LinkedEntity;
import com.azure.ai.textanalytics.models.LinkedEntityCollection;
import com.azure.ai.textanalytics.models.MultiLabelClassifyAction;
import com.azure.ai.textanalytics.models.MultiLabelClassifyOptions;
import com.azure.ai.textanalytics.models.PiiEntityCollection;
import com.azure.ai.textanalytics.models.RecognizeCustomEntitiesAction;
import com.azure.ai.textanalytics.models.RecognizeCustomEntitiesOperationDetail;
import com.azure.ai.textanalytics.models.RecognizeCustomEntitiesOptions;
import com.azure.ai.textanalytics.models.RecognizeEntitiesResult;
import com.azure.ai.textanalytics.models.RecognizeLinkedEntitiesResult;
import com.azure.ai.textanalytics.models.RecognizePiiEntitiesOptions;
import com.azure.ai.textanalytics.models.RecognizePiiEntitiesResult;
import com.azure.ai.textanalytics.models.SingleLabelClassifyAction;
import com.azure.ai.textanalytics.models.SingleLabelClassifyOptions;
import com.azure.ai.textanalytics.models.TextAnalyticsActions;
import com.azure.ai.textanalytics.models.TextAnalyticsError;
import com.azure.ai.textanalytics.models.TextAnalyticsException;
import com.azure.ai.textanalytics.models.TextAnalyticsRequestOptions;
import com.azure.ai.textanalytics.models.TextDocumentInput;
import com.azure.ai.textanalytics.util.AbstractiveSummaryPagedIterable;
import com.azure.ai.textanalytics.util.AbstractiveSummaryResultCollection;
import com.azure.ai.textanalytics.util.AnalyzeActionsResultPagedIterable;
import com.azure.ai.textanalytics.util.AnalyzeHealthcareEntitiesPagedIterable;
import com.azure.ai.textanalytics.util.AnalyzeHealthcareEntitiesResultCollection;
import com.azure.ai.textanalytics.util.AnalyzeSentimentResultCollection;
import com.azure.ai.textanalytics.util.ClassifyDocumentPagedIterable;
import com.azure.ai.textanalytics.util.ClassifyDocumentResultCollection;
import com.azure.ai.textanalytics.util.DetectLanguageResultCollection;
import com.azure.ai.textanalytics.util.ExtractKeyPhrasesResultCollection;
import com.azure.ai.textanalytics.util.ExtractiveSummaryPagedIterable;
import com.azure.ai.textanalytics.util.ExtractiveSummaryResultCollection;
import com.azure.ai.textanalytics.util.RecognizeCustomEntitiesPagedIterable;
import com.azure.ai.textanalytics.util.RecognizeCustomEntitiesResultCollection;
import com.azure.ai.textanalytics.util.RecognizeEntitiesResultCollection;
import com.azure.ai.textanalytics.util.RecognizeLinkedEntitiesResultCollection;
import com.azure.ai.textanalytics.util.RecognizePiiEntitiesResultCollection;
import com.azure.core.annotation.ReturnType;
import com.azure.core.annotation.ServiceClient;
import com.azure.core.annotation.ServiceMethod;
import com.azure.core.credential.AzureKeyCredential;
import com.azure.core.credential.TokenCredential;
import com.azure.core.http.rest.PagedIterable;
import com.azure.core.http.rest.Response;
import com.azure.core.util.Context;
import com.azure.core.util.logging.ClientLogger;
import com.azure.core.util.polling.SyncPoller;
import java.util.Collections;
import java.util.Objects;
import static com.azure.ai.textanalytics.implementation.Utility.inputDocumentsValidation;
import static com.azure.ai.textanalytics.implementation.Utility.mapByIndex;
import static com.azure.ai.textanalytics.implementation.Utility.toTextAnalyticsException;
/**
* This class provides a synchronous client that contains all the operations that apply to Azure Text Analytics.
* Operations allowed by the client are language detection, entities recognition, linked entities recognition,
* key phrases extraction, and sentiment analysis of a document or a list of documents.
*
* <h2>Getting Started</h2>
*
* <p>In order to interact with the Text Analytics features in Azure AI Language Service, you'll need to create an
* instance of the {@link TextAnalyticsClient}. To make this possible you'll need the key credential of the service.
* Alternatively, you can use AAD authentication via
* <a href="https://learn.microsoft.com/java/api/overview/azure/identity-readme?view=azure-java-stable">Azure Identity</a>
* to connect to the service.</p>
* <ol>
* <li>Azure Key Credential, see {@link TextAnalyticsClientBuilder#credential(AzureKeyCredential) AzureKeyCredential}.</li>
* <li>Azure Active Directory, see {@link TextAnalyticsClientBuilder#credential(TokenCredential) TokenCredential}.</li>
* </ol>
*
* <p><strong>Sample: Construct Synchronous Text Analytics Client with Azure Key Credential</strong></p>
*
* <p>The following code sample demonstrates the creation of a {@link TextAnalyticsClient},
* using the {@link TextAnalyticsClientBuilder} to configure it with a key credential.</p>
*
* <!-- src_embed com.azure.ai.textanalytics.TextAnalyticsClient.instantiation -->
* <pre>
* TextAnalyticsClient textAnalyticsClient = new TextAnalyticsClientBuilder()
* .credential(new AzureKeyCredential("{key}"))
* .endpoint("{endpoint}")
* .buildClient();
* </pre>
* <!-- end com.azure.ai.textanalytics.TextAnalyticsClient.instantiation -->
*
* <p>View {@link TextAnalyticsClientBuilder TextAnalyticsClientBuilder} for additional ways to construct the client.</p>
*
* <p>See methods in client level class below to explore all features that library provides.</p>
*
* <br/>
*
* <hr/>
*
* <h2>Extract information</h2>
*
* <p>Text Analytics client can use Natural Language Understanding (NLU) to extract information from unstructured text.
* For example, identify key phrases or Personally Identifiable, etc. Below you can look at the samples on how to use it.</p>
*
* <h3>Key Phrases Extraction</h3>
*
* <p>The {@link TextAnalyticsClient#extractKeyPhrases(String) extractKeyPhrases}
* method can be used to extract key phrases, which returns a list of strings denoting the key phrases in the document.
* </p>
*
* <!-- src_embed com.azure.ai.textanalytics.TextAnalyticsClient.extractKeyPhrases#String -->
* <pre>
* KeyPhrasesCollection extractedKeyPhrases =
* textAnalyticsClient.extractKeyPhrases("My cat might need to see a veterinarian.");
* for (String keyPhrase : extractedKeyPhrases) {
* System.out.printf("%s.%n", keyPhrase);
* }
* </pre>
* <!-- end com.azure.ai.textanalytics.TextAnalyticsClient.extractKeyPhrases#String -->
*
* <p>See <a href="https://aka.ms/talangs">this</a> for supported languages in Text Analytics API.</p>
*
* <p><strong>Note:</strong> For asynchronous sample, refer to {@link TextAnalyticsAsyncClient}.</p>
*
* <h3>Named Entities Recognition(NER): Prebuilt Model</h3>
*
* <p>The {@link TextAnalyticsClient#recognizeEntities(String) recognizeEntities} method can be used to recognize
* entities, which returns a list of general categorized entities in the provided document.</p>
*
* <!-- src_embed com.azure.ai.textanalytics.TextAnalyticsClient.recognizeCategorizedEntities#String -->
* <pre>
* CategorizedEntityCollection recognizeEntitiesResult =
* textAnalyticsClient.recognizeEntities("Satya Nadella is the CEO of Microsoft");
* for (CategorizedEntity entity : recognizeEntitiesResult) {
* System.out.printf("Recognized entity: %s, entity category: %s, confidence score: %f.%n",
* entity.getText(), entity.getCategory(), entity.getConfidenceScore());
* }
* </pre>
* <!-- end com.azure.ai.textanalytics.TextAnalyticsClient.recognizeCategorizedEntities#String -->
*
* <p>See <a href="https://aka.ms/talangs">this</a> for supported languages in Text Analytics API.</p>
*
* <p><strong>Note:</strong> For asynchronous sample, refer to {@link TextAnalyticsAsyncClient}.</p>
*
* <h3>Custom Named Entities Recognition(NER): Custom Model</h3>
*
* <p>The {@link TextAnalyticsClient#beginRecognizeCustomEntities(Iterable, String, String)} method can be used to
* recognize custom entities, which returns a list of custom entities for the provided list of document.</p>
*
* <!-- src_embed Client.beginRecognizeCustomEntities#Iterable-String-String -->
* <pre>
* List<String> documents = new ArrayList<>();
* for (int i = 0; i < 3; i++) {
* documents.add(
* "A recent report by the Government Accountability Office (GAO) found that the dramatic increase "
* + "in oil and natural gas development on federal lands over the past six years has stretched the"
* + " staff of the BLM to a point that it has been unable to meet its environmental protection "
* + "responsibilities."); }
* SyncPoller<RecognizeCustomEntitiesOperationDetail, RecognizeCustomEntitiesPagedIterable> syncPoller =
* textAnalyticsClient.beginRecognizeCustomEntities(documents, "{project_name}", "{deployment_name}");
* syncPoller.waitForCompletion();
* syncPoller.getFinalResult().forEach(documentsResults -> {
* System.out.printf("Project name: %s, deployment name: %s.%n",
* documentsResults.getProjectName(), documentsResults.getDeploymentName());
* for (RecognizeEntitiesResult documentResult : documentsResults) {
* System.out.println("Document ID: " + documentResult.getId());
* for (CategorizedEntity entity : documentResult.getEntities()) {
* System.out.printf(
* "\tText: %s, category: %s, confidence score: %f.%n",
* entity.getText(), entity.getCategory(), entity.getConfidenceScore());
* }
* }
* });
* </pre>
* <!-- end Client.beginRecognizeCustomEntities#Iterable-String-String -->
*
* <p>See <a href="https://aka.ms/talangs">this</a> for supported languages in Text Analytics API.</p>
*
* <p><strong>Note:</strong> For asynchronous sample, refer to {@link TextAnalyticsAsyncClient}.</p>
*
* <h3>Linked Entities Recognition</h3>
*
* <p>The {@link TextAnalyticsClient#recognizeLinkedEntities(String) recognizeLinkedEntities} method can be used to
* find linked entities, which returns a list of recognized entities with links to a well-known knowledge base for
* the provided document.</p>
*
* <!-- src_embed com.azure.ai.textanalytics.TextAnalyticsClient.recognizeLinkedEntities#String -->
* <pre>
* String document = "Old Faithful is a geyser at Yellowstone Park.";
* System.out.println("Linked Entities:");
* textAnalyticsClient.recognizeLinkedEntities(document).forEach(linkedEntity -> {
* System.out.printf("Name: %s, entity ID in data source: %s, URL: %s, data source: %s.%n",
* linkedEntity.getName(), linkedEntity.getDataSourceEntityId(), linkedEntity.getUrl(),
* linkedEntity.getDataSource());
* linkedEntity.getMatches().forEach(entityMatch -> System.out.printf(
* "Matched entity: %s, confidence score: %f.%n",
* entityMatch.getText(), entityMatch.getConfidenceScore()));
* });
* </pre>
* <!-- end com.azure.ai.textanalytics.TextAnalyticsClient.recognizeLinkedEntities#String -->
*
* <p>See <a href="https://aka.ms/talangs">this</a> for supported languages in Text Analytics API.</p>
*
* <p><strong>Note:</strong> For asynchronous sample, refer to {@link TextAnalyticsAsyncClient}.</p>
*
* <h3>Personally Identifiable Information(PII) Entities Recognition</h3>
*
* <p>The {@link TextAnalyticsClient#recognizePiiEntities(String) recognizePiiEntities}
* method can be used to recognize PII entities, which returns a list of Personally Identifiable Information(PII)
* entities in the provided document.</p>
*
* <p>For a list of supported entity types, check: <a href="https://aka.ms/azsdk/language/pii">this</a>.</p>
*
* <!-- src_embed com.azure.ai.textanalytics.TextAnalyticsClient.recognizePiiEntities#String -->
* <pre>
* PiiEntityCollection piiEntityCollection = textAnalyticsClient.recognizePiiEntities("My SSN is 859-98-0987");
* System.out.printf("Redacted Text: %s%n", piiEntityCollection.getRedactedText());
* for (PiiEntity entity : piiEntityCollection) {
* System.out.printf(
* "Recognized Personally Identifiable Information entity: %s, entity category: %s,"
* + " entity subcategory: %s, confidence score: %f.%n",
* entity.getText(), entity.getCategory(), entity.getSubcategory(), entity.getConfidenceScore());
* }
* </pre>
* <!-- end com.azure.ai.textanalytics.TextAnalyticsClient.recognizePiiEntities#String -->
*
* <p>See <a href="https://aka.ms/talangs">this</a> for supported languages in Text Analytics API.</p>
*
* <p><strong>Note:</strong> For asynchronous sample, refer to {@link TextAnalyticsAsyncClient}.</p>
*
* <h3>Text Analytics for Health: Prebuilt Model</h3>
*
* <p>The {@link TextAnalyticsClient#beginAnalyzeHealthcareEntities(Iterable) beginAnalyzeHealthcareEntities} method
* can be used to analyze healthcare entities, entity data sources, and entity relations in a list of documents.</p>
*
* <!-- src_embed com.azure.ai.textanalytics.TextAnalyticsClient.beginAnalyzeHealthcareEntities#Iterable -->
* <pre>
* List<String> documents = new ArrayList<>();
* for (int i = 0; i < 3; i++) {
* documents.add("The patient is a 54-year-old gentleman with a history of progressive angina over "
* + "the past several months.");
* }
*
* SyncPoller<AnalyzeHealthcareEntitiesOperationDetail, AnalyzeHealthcareEntitiesPagedIterable>
* syncPoller = textAnalyticsClient.beginAnalyzeHealthcareEntities(documents);
*
* syncPoller.waitForCompletion();
* AnalyzeHealthcareEntitiesPagedIterable result = syncPoller.getFinalResult();
*
* result.forEach(analyzeHealthcareEntitiesResultCollection -> {
* analyzeHealthcareEntitiesResultCollection.forEach(healthcareEntitiesResult -> {
* System.out.println("document id = " + healthcareEntitiesResult.getId());
* System.out.println("Document entities: ");
* AtomicInteger ct = new AtomicInteger();
* healthcareEntitiesResult.getEntities().forEach(healthcareEntity -> {
* System.out.printf("\ti = %d, Text: %s, category: %s, confidence score: %f.%n",
* ct.getAndIncrement(), healthcareEntity.getText(), healthcareEntity.getCategory(),
* healthcareEntity.getConfidenceScore());
*
* IterableStream<EntityDataSource> healthcareEntityDataSources =
* healthcareEntity.getDataSources();
* if (healthcareEntityDataSources != null) {
* healthcareEntityDataSources.forEach(healthcareEntityLink -> System.out.printf(
* "\t\tEntity ID in data source: %s, data source: %s.%n",
* healthcareEntityLink.getEntityId(), healthcareEntityLink.getName()));
* }
* });
* // Healthcare entity relation groups
* healthcareEntitiesResult.getEntityRelations().forEach(entityRelation -> {
* System.out.printf("\tRelation type: %s.%n", entityRelation.getRelationType());
* entityRelation.getRoles().forEach(role -> {
* final HealthcareEntity entity = role.getEntity();
* System.out.printf("\t\tEntity text: %s, category: %s, role: %s.%n",
* entity.getText(), entity.getCategory(), role.getName());
* });
* System.out.printf("\tRelation confidence score: %f.%n",
* entityRelation.getConfidenceScore());
* });
* });
* });
* </pre>
* <!-- end com.azure.ai.textanalytics.TextAnalyticsClient.beginAnalyzeHealthcareEntities#Iterable -->
*
* <p>See <a href="https://aka.ms/talangs">this</a> for supported languages in Text Analytics API.</p>
*
* <p><strong>Note:</strong> For asynchronous sample, refer to {@link TextAnalyticsAsyncClient}.</p>
*
* <br/>
*
* <hr/>
*
* <h2>Summarize text-based content: Document Summarization</h2>
*
* <p>Text Analytics client can use Natural Language Understanding (NLU) to summarize lengthy documents.
* For example, extractive or abstractive summarization. Below you can look at the samples on how to use it.</p>
*
* <h3>Extractive summarization</h3>
*
* <p>The {@link TextAnalyticsClient#beginExtractSummary(Iterable) beginExtractSummary}
* method returns a list of extract summaries for the provided list of document.</p>
*
* <p>This method is supported since service API version {@link TextAnalyticsServiceVersion#V2023_04_01}.</p>
*
* <!-- src_embed Client.beginExtractSummary#Iterable -->
* <pre>
* List<String> documents = new ArrayList<>();
* for (int i = 0; i < 3; i++) {
* documents.add(
* "At Microsoft, we have been on a quest to advance AI beyond existing techniques, by taking a more holistic,"
* + " human-centric approach to learning and understanding. As Chief Technology Officer of Azure AI"
* + " Cognitive Services, I have been working with a team of amazing scientists and engineers to turn "
* + "this quest into a reality. In my role, I enjoy a unique perspective in viewing the relationship"
* + " among three attributes of human cognition: monolingual text (X), audio or visual sensory signals,"
* + " (Y) and multilingual (Z). At the intersection of all three, there’s magic—what we call XYZ-code"
* + " as illustrated in Figure 1—a joint representation to create more powerful AI that can speak, hear,"
* + " see, and understand humans better. We believe XYZ-code will enable us to fulfill our long-term"
* + " vision: cross-domain transfer learning, spanning modalities and languages. The goal is to have"
* + " pretrained models that can jointly learn representations to support a broad range of downstream"
* + " AI tasks, much in the way humans do today. Over the past five years, we have achieved human"
* + " performance on benchmarks in conversational speech recognition, machine translation, "
* + "conversational question answering, machine reading comprehension, and image captioning. These"
* + " five breakthroughs provided us with strong signals toward our more ambitious aspiration to"
* + " produce a leap in AI capabilities, achieving multisensory and multilingual learning that "
* + "is closer in line with how humans learn and understand. I believe the joint XYZ-code is a "
* + "foundational component of this aspiration, if grounded with external knowledge sources in "
* + "the downstream AI tasks.");
* }
* SyncPoller<ExtractiveSummaryOperationDetail, ExtractiveSummaryPagedIterable> syncPoller =
* textAnalyticsClient.beginExtractSummary(documents);
* syncPoller.waitForCompletion();
* syncPoller.getFinalResult().forEach(resultCollection -> {
* for (ExtractiveSummaryResult documentResult : resultCollection) {
* System.out.println("\tExtracted summary sentences:");
* for (ExtractiveSummarySentence extractiveSummarySentence : documentResult.getSentences()) {
* System.out.printf(
* "\t\t Sentence text: %s, length: %d, offset: %d, rank score: %f.%n",
* extractiveSummarySentence.getText(), extractiveSummarySentence.getLength(),
* extractiveSummarySentence.getOffset(), extractiveSummarySentence.getRankScore());
* }
* }
* });
* </pre>
* <!-- end Client.beginExtractSummary#Iterable -->
*
* <p>See <a href="https://aka.ms/talangs">this</a> for supported languages in Text Analytics API.</p>
*
* <p><strong>Note:</strong> For asynchronous sample, refer to {@link TextAnalyticsAsyncClient}.</p>
*
* <h3>Abstractive summarization</h3>
* <p>The {@link TextAnalyticsClient#beginAbstractSummary(Iterable) beginAbstractSummary}
* method returns a list of abstractive summary for the provided list of document.</p>
*
* <p>This method is supported since service API version {@link TextAnalyticsServiceVersion#V2023_04_01}.</p>
*
* <!-- src_embed Client.beginAbstractSummary#Iterable -->
* <pre>
* List<String> documents = new ArrayList<>();
* for (int i = 0; i < 3; i++) {
* documents.add(
* "At Microsoft, we have been on a quest to advance AI beyond existing techniques, by taking a more holistic,"
* + " human-centric approach to learning and understanding. As Chief Technology Officer of Azure AI"
* + " Cognitive Services, I have been working with a team of amazing scientists and engineers to turn "
* + "this quest into a reality. In my role, I enjoy a unique perspective in viewing the relationship"
* + " among three attributes of human cognition: monolingual text (X), audio or visual sensory signals,"
* + " (Y) and multilingual (Z). At the intersection of all three, there’s magic—what we call XYZ-code"
* + " as illustrated in Figure 1—a joint representation to create more powerful AI that can speak, hear,"
* + " see, and understand humans better. We believe XYZ-code will enable us to fulfill our long-term"
* + " vision: cross-domain transfer learning, spanning modalities and languages. The goal is to have"
* + " pretrained models that can jointly learn representations to support a broad range of downstream"
* + " AI tasks, much in the way humans do today. Over the past five years, we have achieved human"
* + " performance on benchmarks in conversational speech recognition, machine translation, "
* + "conversational question answering, machine reading comprehension, and image captioning. These"
* + " five breakthroughs provided us with strong signals toward our more ambitious aspiration to"
* + " produce a leap in AI capabilities, achieving multisensory and multilingual learning that "
* + "is closer in line with how humans learn and understand. I believe the joint XYZ-code is a "
* + "foundational component of this aspiration, if grounded with external knowledge sources in "
* + "the downstream AI tasks.");
* }
* SyncPoller<AbstractiveSummaryOperationDetail, AbstractiveSummaryPagedIterable> syncPoller =
* textAnalyticsClient.beginAbstractSummary(documents);
* syncPoller.waitForCompletion();
* syncPoller.getFinalResult().forEach(resultCollection -> {
* for (AbstractiveSummaryResult documentResult : resultCollection) {
* System.out.println("\tAbstractive summary sentences:");
* for (AbstractiveSummary summarySentence : documentResult.getSummaries()) {
* System.out.printf("\t\t Summary text: %s.%n", summarySentence.getText());
* for (AbstractiveSummaryContext abstractiveSummaryContext : summarySentence.getContexts()) {
* System.out.printf("\t\t offset: %d, length: %d%n",
* abstractiveSummaryContext.getOffset(), abstractiveSummaryContext.getLength());
* }
* }
* }
* });
* </pre>
* <!-- end Client.beginAbstractSummary#Iterable -->
*
* <p>See <a href="https://aka.ms/talangs">this</a> for supported languages in Text Analytics API.</p>
*
* <p><strong>Note:</strong> For asynchronous sample, refer to {@link TextAnalyticsAsyncClient}.</p>
*
* <br/>
*
* <hr/>
*
* <h2>Classify Text</h2>
*
* <p>Text Analytics client can use Natural Language Understanding (NLU) to detect the language or
* classify the sentiment of text you have. For example, language detection, sentiment analysis, or
* custom text classification. Below you can look at the samples on how to use it.</p>
*
* <h3>Analyze Sentiment and Mine Text for Opinions</h3>
*
* <p>The {@link TextAnalyticsClient#analyzeSentiment(String, String, AnalyzeSentimentOptions)} analyzeSentiment}
* method can be used to analyze sentiment on a given input text string, which returns a sentiment prediction,
* as well as confidence scores for each sentiment label (Positive, Negative, and Neutral) for the document and each
* sentence within it. If the {@code includeOpinionMining} of {@link AnalyzeSentimentOptions} set to true,
* the output will include the opinion mining results. It mines the opinions of a sentence and conducts more granular
* analysis around the aspects in the text (also known as aspect-based sentiment analysis).</p>
*
* <!-- src_embed com.azure.ai.textanalytics.TextAnalyticsClient.analyzeSentiment#String-String-AnalyzeSentimentOptions -->
* <pre>
* DocumentSentiment documentSentiment = textAnalyticsClient.analyzeSentiment(
* "The hotel was dark and unclean.", "en",
* new AnalyzeSentimentOptions().setIncludeOpinionMining(true));
* for (SentenceSentiment sentenceSentiment : documentSentiment.getSentences()) {
* System.out.printf("\tSentence sentiment: %s%n", sentenceSentiment.getSentiment());
* sentenceSentiment.getOpinions().forEach(opinion -> {
* TargetSentiment targetSentiment = opinion.getTarget();
* System.out.printf("\tTarget sentiment: %s, target text: %s%n", targetSentiment.getSentiment(),
* targetSentiment.getText());
* for (AssessmentSentiment assessmentSentiment : opinion.getAssessments()) {
* System.out.printf("\t\t'%s' sentiment because of \"%s\". Is the assessment negated: %s.%n",
* assessmentSentiment.getSentiment(), assessmentSentiment.getText(), assessmentSentiment.isNegated());
* }
* });
* }
* </pre>
* <!-- end com.azure.ai.textanalytics.TextAnalyticsClient.analyzeSentiment#String-String-AnalyzeSentimentOptions -->
*
* <p>See <a href="https://aka.ms/talangs">this</a> for supported languages in Text Analytics API.</p>
*
* <p><strong>Note:</strong> For asynchronous sample, refer to {@link TextAnalyticsAsyncClient}.</p>
*
* <h3>Detect Language</h3>
*
* <p>The {@link TextAnalyticsClient#detectLanguage(String) detectLanguage}
* method returns the detected language and a confidence score between zero and one. Scores close to one indicate 100%
* certainty that the identified language is true.</p>
*
* <p>This method will use the default country hint that sets up in
* {@link TextAnalyticsClientBuilder#defaultCountryHint(String)}. If none is specified, service will use 'US' as the
* country hint.</p>
*
* <!-- src_embed com.azure.ai.textanalytics.TextAnalyticsClient.detectLanguage#String -->
* <pre>
* DetectedLanguage detectedLanguage = textAnalyticsClient.detectLanguage("Bonjour tout le monde");
* System.out.printf("Detected language name: %s, ISO 6391 name: %s, confidence score: %f.%n",
* detectedLanguage.getName(), detectedLanguage.getIso6391Name(), detectedLanguage.getConfidenceScore());
* </pre>
* <!-- end com.azure.ai.textanalytics.TextAnalyticsClient.detectLanguage#String -->
*
* <p>See <a href="https://aka.ms/talangs">this</a> for supported languages in Text Analytics API.</p>
*
* <p><strong>Note:</strong> For asynchronous sample, refer to {@link TextAnalyticsAsyncClient}.</p>
*
* <h3>Single-Label Classification</h3>
* <p>The {@link TextAnalyticsClient#beginSingleLabelClassify(Iterable, String, String) beginSingleLabelClassify}
* method returns a list of single-label classification for the provided list of documents.</p>
*
* <p><strong>Note:</strong> this method is supported since service API version {@link TextAnalyticsServiceVersion#V2022_05_01}.</p>
*
* <!-- src_embed Client.beginSingleLabelClassify#Iterable-String-String -->
* <pre>
* List<String> documents = new ArrayList<>();
* for (int i = 0; i < 3; i++) {
* documents.add(
* "A recent report by the Government Accountability Office (GAO) found that the dramatic increase "
* + "in oil and natural gas development on federal lands over the past six years has stretched the"
* + " staff of the BLM to a point that it has been unable to meet its environmental protection "
* + "responsibilities."
* );
* }
* // See the service documentation for regional support and how to train a model to classify your documents,
* // see https://aka.ms/azsdk/textanalytics/customfunctionalities
* SyncPoller<ClassifyDocumentOperationDetail, ClassifyDocumentPagedIterable> syncPoller =
* textAnalyticsClient.beginSingleLabelClassify(documents, "{project_name}", "{deployment_name}");
* syncPoller.waitForCompletion();
* syncPoller.getFinalResult().forEach(documentsResults -> {
* System.out.printf("Project name: %s, deployment name: %s.%n",
* documentsResults.getProjectName(), documentsResults.getDeploymentName());
* for (ClassifyDocumentResult documentResult : documentsResults) {
* System.out.println("Document ID: " + documentResult.getId());
* for (ClassificationCategory classification : documentResult.getClassifications()) {
* System.out.printf("\tCategory: %s, confidence score: %f.%n",
* classification.getCategory(), classification.getConfidenceScore());
* }
* }
* });
* </pre>
* <!-- end Client.beginSingleLabelClassify#Iterable-String-String -->
*
* <p>See <a href="https://aka.ms/talangs">this</a> for supported languages in Text Analytics API.</p>
*
* <p><strong>Note:</strong> For asynchronous sample, refer to {@link TextAnalyticsAsyncClient}.</p>
*
* <h3>Multi-Label Classification</h3>
*
* <p>The {@link TextAnalyticsClient#beginMultiLabelClassify(Iterable, String, String) beginMultiLabelClassify}
* method returns a list of multi-label classification for the provided list of document.</p>
*
* <p><strong>Note:</strong> this method is supported since service API version {@link TextAnalyticsServiceVersion#V2022_05_01}.</p>
*
* <!-- src_embed Client.beginMultiLabelClassify#Iterable-String-String -->
* <pre>
* List<String> documents = new ArrayList<>();
* for (int i = 0; i < 3; i++) {
* documents.add(
* "I need a reservation for an indoor restaurant in China. Please don't stop the music."
* + " Play music and add it to my playlist");
* }
* SyncPoller<ClassifyDocumentOperationDetail, ClassifyDocumentPagedIterable> syncPoller =
* textAnalyticsClient.beginMultiLabelClassify(documents, "{project_name}", "{deployment_name}");
* syncPoller.waitForCompletion();
* syncPoller.getFinalResult().forEach(documentsResults -> {
* System.out.printf("Project name: %s, deployment name: %s.%n",
* documentsResults.getProjectName(), documentsResults.getDeploymentName());
* for (ClassifyDocumentResult documentResult : documentsResults) {
* System.out.println("Document ID: " + documentResult.getId());
* for (ClassificationCategory classification : documentResult.getClassifications()) {
* System.out.printf("\tCategory: %s, confidence score: %f.%n",
* classification.getCategory(), classification.getConfidenceScore());
* }
* }
* });
* </pre>
* <!-- end Client.beginMultiLabelClassify#Iterable-String-String -->
*
* <p>See <a href="https://aka.ms/talangs">this</a> for supported languages in Text Analytics API.</p>
*
* <p><strong>Note:</strong> For asynchronous sample, refer to {@link TextAnalyticsAsyncClient}.</p>
*
* <br/>
*
* <hr/>
*
* <h2>Execute multiple actions</h2>
*
* <p>The {@link TextAnalyticsClient#beginAnalyzeActions(Iterable, TextAnalyticsActions) beginAnalyzeActions} method
* execute actions, such as, entities recognition, PII entities recognition, key phrases extraction, and etc, for a
* list of documents.</p>
*
* <!-- src_embed com.azure.ai.textanalytics.TextAnalyticsClient.beginAnalyzeActions#Iterable-TextAnalyticsActions -->
* <pre>
* List<String> documents = Arrays.asList(
* "Elon Musk is the CEO of SpaceX and Tesla.",
* "My SSN is 859-98-0987"
* );
*
* SyncPoller<AnalyzeActionsOperationDetail, AnalyzeActionsResultPagedIterable> syncPoller =
* textAnalyticsClient.beginAnalyzeActions(
* documents,
* new TextAnalyticsActions().setDisplayName("{tasks_display_name}")
* .setRecognizeEntitiesActions(new RecognizeEntitiesAction())
* .setExtractKeyPhrasesActions(new ExtractKeyPhrasesAction()));
* syncPoller.waitForCompletion();
* AnalyzeActionsResultPagedIterable result = syncPoller.getFinalResult();
* result.forEach(analyzeActionsResult -> {
* System.out.println("Entities recognition action results:");
* analyzeActionsResult.getRecognizeEntitiesResults().forEach(
* actionResult -> {
* if (!actionResult.isError()) {
* actionResult.getDocumentsResults().forEach(
* entitiesResult -> entitiesResult.getEntities().forEach(
* entity -> System.out.printf(
* "Recognized entity: %s, entity category: %s, entity subcategory: %s,"
* + " confidence score: %f.%n",
* entity.getText(), entity.getCategory(), entity.getSubcategory(),
* entity.getConfidenceScore())));
* }
* });
* System.out.println("Key phrases extraction action results:");
* analyzeActionsResult.getExtractKeyPhrasesResults().forEach(
* actionResult -> {
* if (!actionResult.isError()) {
* actionResult.getDocumentsResults().forEach(extractKeyPhraseResult -> {
* System.out.println("Extracted phrases:");
* extractKeyPhraseResult.getKeyPhrases()
* .forEach(keyPhrases -> System.out.printf("\t%s.%n", keyPhrases));
* });
* }
* });
* });
* </pre>
* <!-- end com.azure.ai.textanalytics.TextAnalyticsClient.beginAnalyzeActions#Iterable-TextAnalyticsActions -->
*
* <p>See <a href="https://aka.ms/talangs">this</a> for supported languages in Text Analytics API.</p>
*
* <p><strong>Note:</strong> For asynchronous sample, refer to {@link TextAnalyticsAsyncClient}.</p>
*
* @see com.azure.ai.textanalytics
* @see TextAnalyticsClientBuilder
*/
@ServiceClient(builder = TextAnalyticsClientBuilder.class)
public final class TextAnalyticsClient {
private static final ClientLogger LOGGER = new ClientLogger(TextAnalyticsClient.class);
private final TextAnalyticsAsyncClient client;
/**
* Creates a {@code TextAnalyticsClient client} that sends requests to the Text Analytics service's endpoint.
* Each service call goes through the {@link TextAnalyticsClientBuilder#pipeline http pipeline}.
*
* @param client The {@link TextAnalyticsClient} that the client routes its request through.
*/
TextAnalyticsClient(TextAnalyticsAsyncClient client) {
this.client = client;
}
/**
* Gets default country hint code.
*
* @return The default country hint code
*/
public String getDefaultCountryHint() {
return client.getDefaultCountryHint();
}
/**
* Gets default language when the builder is setup.
*
* @return The default language
*/
public String getDefaultLanguage() {
return client.getDefaultLanguage();
}
/**
* Returns the detected language and a confidence score between zero and one. Scores close to one indicate 100%
* certainty that the identified language is true.
*
* This method will use the default country hint that sets up in
* {@link TextAnalyticsClientBuilder#defaultCountryHint(String)}. If none is specified, service will use 'US' as
* the country hint.
*
* <p><strong>Code Sample</strong></p>
* <p>Detects the language of single document.</p>
* <!-- src_embed com.azure.ai.textanalytics.TextAnalyticsClient.detectLanguage#String -->
* <pre>
* DetectedLanguage detectedLanguage = textAnalyticsClient.detectLanguage("Bonjour tout le monde");
* System.out.printf("Detected language name: %s, ISO 6391 name: %s, confidence score: %f.%n",
* detectedLanguage.getName(), detectedLanguage.getIso6391Name(), detectedLanguage.getConfidenceScore());
* </pre>
* <!-- end com.azure.ai.textanalytics.TextAnalyticsClient.detectLanguage#String -->
*
* @param document The document to be analyzed.
* For text length limits, maximum batch size, and supported text encoding, see
* <a href="https://aka.ms/azsdk/textanalytics/data-limits">data limits</a>.
*
* @return The {@link DetectedLanguage detected language} of the document.
*
* @throws NullPointerException if {@code document} is null.
*/
@ServiceMethod(returns = ReturnType.SINGLE)
public DetectedLanguage detectLanguage(String document) {
return detectLanguage(document, client.getDefaultCountryHint());
}
/**
* Returns the detected language and a confidence score between zero and one.
* Scores close to one indicate 100% certainty that the identified language is true.
*
* <p><strong>Code Sample</strong></p>
* <p>Detects the language of documents with a provided country hint.</p>
* <!-- src_embed com.azure.ai.textanalytics.TextAnalyticsClient.detectLanguage#String-String -->
* <pre>
* DetectedLanguage detectedLanguage = textAnalyticsClient.detectLanguage(
* "This text is in English", "US");
* System.out.printf("Detected language name: %s, ISO 6391 name: %s, confidence score: %f.%n",
* detectedLanguage.getName(), detectedLanguage.getIso6391Name(), detectedLanguage.getConfidenceScore());
* </pre>
* <!-- end com.azure.ai.textanalytics.TextAnalyticsClient.detectLanguage#String-String -->
*
* @param document The document to be analyzed.
* For text length limits, maximum batch size, and supported text encoding, see
* <a href="https://aka.ms/azsdk/textanalytics/data-limits">data limits</a>.
* @param countryHint Accepts two letter country codes specified by ISO 3166-1 alpha-2. Defaults to "US" if not
* specified. To remove this behavior you can reset this parameter by setting this value to empty string
* {@code countryHint} = "" or "none".
*
* @return The {@link DetectedLanguage detected language} of the document.
*
* @throws NullPointerException if {@code document} is null.
*/
@ServiceMethod(returns = ReturnType.SINGLE)
public DetectedLanguage detectLanguage(String document, String countryHint) {
Objects.requireNonNull(document, "'document' cannot be null.");
DetectLanguageResultCollection resultCollection =
detectLanguageBatch(Collections.singletonList(document), countryHint, null);
DetectedLanguage detectedLanguage = null;
for (DetectLanguageResult detectLanguageResult : resultCollection) {
if (detectLanguageResult.isError()) {
throw LOGGER.logExceptionAsError(toTextAnalyticsException(detectLanguageResult.getError()));
}
detectedLanguage = detectLanguageResult.getPrimaryLanguage();
}
// When the detected language result collection is empty,
// return empty result for the empty collection returned by the service.
return detectedLanguage;
}
/**
* Detects Language for a batch of document with the provided country hint and request options.
*
* <p><strong>Code Sample</strong></p>
* <p>Detects the language in a list of documents with a provided country hint and request options.</p>
* <!-- src_embed com.azure.ai.textanalytics.TextAnalyticsClient.detectLanguageBatch#Iterable-String-TextAnalyticsRequestOptions -->
* <pre>
* List<String> documents = Arrays.asList(
* "This is written in English",
* "Este es un documento escrito en Español."
* );
*
* DetectLanguageResultCollection resultCollection =
* textAnalyticsClient.detectLanguageBatch(documents, "US", null);
*
* // Batch statistics
* TextDocumentBatchStatistics batchStatistics = resultCollection.getStatistics();
* System.out.printf("A batch of documents statistics, transaction count: %s, valid document count: %s.%n",
* batchStatistics.getTransactionCount(), batchStatistics.getValidDocumentCount());
*
* // Batch result of languages
* resultCollection.forEach(detectLanguageResult -> {
* System.out.printf("Document ID: %s%n", detectLanguageResult.getId());
* DetectedLanguage detectedLanguage = detectLanguageResult.getPrimaryLanguage();
* System.out.printf("Primary language name: %s, ISO 6391 name: %s, confidence score: %f.%n",
* detectedLanguage.getName(), detectedLanguage.getIso6391Name(),
* detectedLanguage.getConfidenceScore());
* });
* </pre>
* <!-- end com.azure.ai.textanalytics.TextAnalyticsClient.detectLanguageBatch#Iterable-String-TextAnalyticsRequestOptions -->
*
* @param documents The list of documents to detect languages for.
* For text length limits, maximum batch size, and supported text encoding, see
* <a href="https://aka.ms/azsdk/textanalytics/data-limits">data limits</a>.
* @param countryHint Accepts two letter country codes specified by ISO 3166-1 alpha-2. Defaults to "US" if not
* specified. To remove this behavior you can reset this parameter by setting this value to empty string
* {@code countryHint} = "" or "none".
* @param options The {@link TextAnalyticsRequestOptions options} to configure the scoring model for documents
* and show statistics.
*
* @return A {@link DetectLanguageResultCollection}.
*
* @throws NullPointerException if {@code documents} is null.
* @throws IllegalArgumentException if {@code documents} is empty.
* @throws UnsupportedOperationException if {@link TextAnalyticsRequestOptions#isServiceLogsDisabled()} is true in service
* API version {@link TextAnalyticsServiceVersion#V3_0}. {@code disableServiceLogs} is only available for API
* version v3.1 and newer.
*/
@ServiceMethod(returns = ReturnType.SINGLE)
public DetectLanguageResultCollection detectLanguageBatch(
Iterable<String> documents, String countryHint, TextAnalyticsRequestOptions options) {
inputDocumentsValidation(documents);
return detectLanguageBatchWithResponse(mapByIndex(documents,
(index, value) -> new DetectLanguageInput(index, value, countryHint)), options, Context.NONE).getValue();
}
/**
* Detects Language for a batch of {@link DetectLanguageInput document} with provided request options.
*
* <p><strong>Code Sample</strong></p>
* <p>Detects the languages with http response in a list of {@link DetectLanguageInput document} with provided
* request options.</p>
* <!-- src_embed com.azure.ai.textanalytics.TextAnalyticsClient.detectLanguageBatch#Iterable-TextAnalyticsRequestOptions-Context -->
* <pre>
* List<DetectLanguageInput> detectLanguageInputs = Arrays.asList(
* new DetectLanguageInput("1", "This is written in English.", "US"),
* new DetectLanguageInput("2", "Este es un documento escrito en Español.", "es")
* );
*
* Response<DetectLanguageResultCollection> response =
* textAnalyticsClient.detectLanguageBatchWithResponse(detectLanguageInputs,
* new TextAnalyticsRequestOptions().setIncludeStatistics(true), Context.NONE);
*
* // Response's status code
* System.out.printf("Status code of request response: %d%n", response.getStatusCode());
* DetectLanguageResultCollection detectedLanguageResultCollection = response.getValue();
*
* // Batch statistics
* TextDocumentBatchStatistics batchStatistics = detectedLanguageResultCollection.getStatistics();
* System.out.printf(
* "Documents statistics: document count = %d, erroneous document count = %d, transaction count = %d,"
* + " valid document count = %d.%n",
* batchStatistics.getDocumentCount(), batchStatistics.getInvalidDocumentCount(),
* batchStatistics.getTransactionCount(), batchStatistics.getValidDocumentCount());
*
* // Batch result of languages
* detectedLanguageResultCollection.forEach(detectLanguageResult -> {
* System.out.printf("Document ID: %s%n", detectLanguageResult.getId());
* DetectedLanguage detectedLanguage = detectLanguageResult.getPrimaryLanguage();
* System.out.printf("Primary language name: %s, ISO 6391 name: %s, confidence score: %f.%n",
* detectedLanguage.getName(), detectedLanguage.getIso6391Name(),
* detectedLanguage.getConfidenceScore());
* });
* </pre>
* <!-- end com.azure.ai.textanalytics.TextAnalyticsClient.detectLanguageBatch#Iterable-TextAnalyticsRequestOptions-Context -->
*
* @param documents The list of {@link DetectLanguageInput documents} to be analyzed.
* For text length limits, maximum batch size, and supported text encoding, see
* <a href="https://aka.ms/azsdk/textanalytics/data-limits">data limits</a>.
* @param options The {@link TextAnalyticsRequestOptions options} to configure the scoring model for documents
* and show statistics.
* @param context Additional context that is passed through the Http pipeline during the service call.
*
* @return A {@link Response} that contains a {@link DetectLanguageResultCollection}.
*
* @throws NullPointerException if {@code documents} is null.
* @throws IllegalArgumentException if {@code documents} is empty.
* @throws UnsupportedOperationException if {@link TextAnalyticsRequestOptions#isServiceLogsDisabled()} is true in service
* API version {@link TextAnalyticsServiceVersion#V3_0}. {@code disableServiceLogs} is only available for API
* version v3.1 and newer.
*/
@ServiceMethod(returns = ReturnType.SINGLE)
public Response<DetectLanguageResultCollection> detectLanguageBatchWithResponse(
Iterable<DetectLanguageInput> documents, TextAnalyticsRequestOptions options, Context context) {
inputDocumentsValidation(documents);
return client.detectLanguageUtilClient.getDetectedLanguageResponseSync(documents, options, context);
}
// Categorized Entity
/**
* Returns a list of general categorized entities in the provided document.
*
* For a list of supported entity types, check: <a href="https://aka.ms/taner">this</a>
*
* This method will use the default language that can be set by using method
* {@link TextAnalyticsClientBuilder#defaultLanguage(String)}. If none is specified, service will use 'en' as
* the language.
*
* <p><strong>Code Sample</strong></p>
* <p>Recognize the entities of documents</p>
* <!-- src_embed com.azure.ai.textanalytics.TextAnalyticsClient.recognizeCategorizedEntities#String -->
* <pre>
* CategorizedEntityCollection recognizeEntitiesResult =
* textAnalyticsClient.recognizeEntities("Satya Nadella is the CEO of Microsoft");
* for (CategorizedEntity entity : recognizeEntitiesResult) {
* System.out.printf("Recognized entity: %s, entity category: %s, confidence score: %f.%n",
* entity.getText(), entity.getCategory(), entity.getConfidenceScore());
* }
* </pre>
* <!-- end com.azure.ai.textanalytics.TextAnalyticsClient.recognizeCategorizedEntities#String -->
*
* @param document The document to recognize entities for.
* For text length limits, maximum batch size, and supported text encoding, see
* <a href="https://aka.ms/azsdk/textanalytics/data-limits">data limits</a>.
*
* @return A {@link CategorizedEntityCollection} contains a list of
* {@link CategorizedEntity recognized categorized entities} and warnings.
*
* @throws NullPointerException if {@code document} is null.
* @throws TextAnalyticsException if the response returned with an {@link TextAnalyticsError error}.
*/
@ServiceMethod(returns = ReturnType.SINGLE)
public CategorizedEntityCollection recognizeEntities(String document) {
return recognizeEntities(document, client.getDefaultLanguage());
}
/**
* Returns a list of general categorized entities in the provided document with provided language code.
*
* For a list of supported entity types, check: <a href="https://aka.ms/taner">this</a>
* For a list of enabled languages, check: <a href="https://aka.ms/talangs">this</a>
*
* <p><strong>Code Sample</strong></p>
* <p>Recognizes the entities in a document with a provided language code.</p>
* <!-- src_embed com.azure.ai.textanalytics.TextAnalyticsClient.recognizeCategorizedEntities#String-String -->
* <pre>
* CategorizedEntityCollection recognizeEntitiesResult =
* textAnalyticsClient.recognizeEntities("Satya Nadella is the CEO of Microsoft", "en");
*
* for (CategorizedEntity entity : recognizeEntitiesResult) {
* System.out.printf("Recognized entity: %s, entity category: %s, confidence score: %f.%n",
* entity.getText(), entity.getCategory(), entity.getConfidenceScore());
* }
* </pre>
* <!-- end com.azure.ai.textanalytics.TextAnalyticsClient.recognizeCategorizedEntities#String-String -->
*
* @param document The document to recognize entities for.
* For text length limits, maximum batch size, and supported text encoding, see
* <a href="https://aka.ms/azsdk/textanalytics/data-limits">data limits</a>.
* @param language The 2 letter ISO 639-1 representation of language. If not set, uses "en" for English as default.
*
* @return The {@link CategorizedEntityCollection} contains a list of
* {@link CategorizedEntity recognized categorized entities} and warnings.
*
* @throws NullPointerException if {@code document} is null.
* @throws TextAnalyticsException if the response returned with an {@link TextAnalyticsError error}.
*/
@ServiceMethod(returns = ReturnType.SINGLE)
public CategorizedEntityCollection recognizeEntities(String document, String language) {
Objects.requireNonNull(document, "'document' cannot be null.");
final RecognizeEntitiesResultCollection recognizeEntitiesResultCollectionMono =
recognizeEntitiesBatch(Collections.singletonList(document), language, null);
CategorizedEntityCollection entityCollection = null;
// for each loop will have only one entry inside
for (RecognizeEntitiesResult entitiesResult : recognizeEntitiesResultCollectionMono) {
if (entitiesResult.isError()) {
throw LOGGER.logExceptionAsError(toTextAnalyticsException(entitiesResult.getError()));
}
entityCollection = new CategorizedEntityCollection(entitiesResult.getEntities(),
entitiesResult.getEntities().getWarnings());
}
return entityCollection;
}
/**
* Returns a list of general categorized entities for the provided list of documents with provided language code
* and request options.
*
* <p><strong>Code Sample</strong></p>
* <p>Recognizes the entities in a list of documents with a provided language code and request options.</p>
* <!-- src_embed com.azure.ai.textanalytics.TextAnalyticsClient.recognizeCategorizedEntitiesBatch#Iterable-String-TextAnalyticsRequestOptions -->
* <pre>
* List<String> documents = Arrays.asList(
* "I had a wonderful trip to Seattle last week.",
* "I work at Microsoft.");
*
* RecognizeEntitiesResultCollection resultCollection =
* textAnalyticsClient.recognizeEntitiesBatch(documents, "en", null);
*
* // Batch statistics
* TextDocumentBatchStatistics batchStatistics = resultCollection.getStatistics();
* System.out.printf(
* "A batch of documents statistics, transaction count: %s, valid document count: %s.%n",
* batchStatistics.getTransactionCount(), batchStatistics.getValidDocumentCount());
*
* resultCollection.forEach(recognizeEntitiesResult ->
* recognizeEntitiesResult.getEntities().forEach(entity ->
* System.out.printf("Recognized entity: %s, entity category: %s, confidence score: %f.%n",
* entity.getText(), entity.getCategory(), entity.getConfidenceScore())));
* </pre>
* <!-- end com.azure.ai.textanalytics.TextAnalyticsClient.recognizeCategorizedEntitiesBatch#Iterable-String-TextAnalyticsRequestOptions -->
*
* @param documents A list of documents to recognize entities for.
* For text length limits, maximum batch size, and supported text encoding, see
* <a href="https://aka.ms/azsdk/textanalytics/data-limits">data limits</a>.
* @param language The 2 letter ISO 639-1 representation of language. If not set, uses "en" for English as default.
* @param options The {@link TextAnalyticsRequestOptions options} to configure the scoring model for documents
* and show statistics.
*
* @return A {@link RecognizeEntitiesResultCollection}.
*
* @throws NullPointerException if {@code documents} is null.
* @throws IllegalArgumentException if {@code documents} is empty.
* @throws UnsupportedOperationException if {@link TextAnalyticsRequestOptions#isServiceLogsDisabled()} is true in
* service API version {@link TextAnalyticsServiceVersion#V3_0}. {@code disableServiceLogs} is only available for
* API version v3.1 and newer.
*/
@ServiceMethod(returns = ReturnType.SINGLE)
public RecognizeEntitiesResultCollection recognizeEntitiesBatch(
Iterable<String> documents, String language, TextAnalyticsRequestOptions options) {
inputDocumentsValidation(documents);
return recognizeEntitiesBatchWithResponse(mapByIndex(documents, (index, value) -> {
final TextDocumentInput textDocumentInput = new TextDocumentInput(index, value);
textDocumentInput.setLanguage(language);
return textDocumentInput;
}), options, Context.NONE).getValue();
}
/**
* Returns a list of general categorized entities for the provided list of {@link TextDocumentInput document} with
* provided request options.
*
* <p><strong>Code Sample</strong></p>
* <p>Recognizes the entities with http response in a list of {@link TextDocumentInput document} with provided
* request options.</p>
* <!-- src_embed com.azure.ai.textanalytics.TextAnalyticsClient.recognizeEntitiesBatch#Iterable-TextAnalyticsRequestOptions-Context -->
* <pre>
* List<TextDocumentInput> textDocumentInputs = Arrays.asList(
* new TextDocumentInput("0", "I had a wonderful trip to Seattle last week.").setLanguage("en"),
* new TextDocumentInput("1", "I work at Microsoft.").setLanguage("en")
* );