Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[TA] Add PII back #14794

Merged
merged 7 commits into from
Sep 3, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions sdk/textanalytics/Azure.AI.TextAnalytics/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
### New Features
- It defaults to the latest supported API version, which currently is `3.1-preview.2`.
- `ErrorCode` value returned from the service is now surfaced in `RequestFailedException`.
- Added the `RecognizePiiEntities` endpoint which returns entities containing Personally Identifiable Information. This feature is available in the Text Analytics service v3.1-preview.1 and above.
- Support added for Opinion Mining. This feature is available in the Text Analytics service v3.1-preview.1 and above.
- Added `Offset` and `Length` properties for `CategorizedEntity`, `SentenceSentiment`, and `LinkedEntityMatch`. The default encoding is UTF-16 code units. For additional information see https://aka.ms/text-analytics-offsets
- `TextAnalyticsError` and `TextAnalyticsWarning` now are marked as immutable.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,28 @@ public partial class LinkedEntityCollection : System.Collections.ObjectModel.Rea
public Azure.AI.TextAnalytics.TextSentiment Sentiment { get { throw null; } }
public string Text { get { throw null; } }
}
[System.Runtime.InteropServices.StructLayoutAttribute(System.Runtime.InteropServices.LayoutKind.Sequential)]
public readonly partial struct PiiEntity
{
private readonly object _dummy;
private readonly int _dummyPrimitive;
public Azure.AI.TextAnalytics.EntityCategory Category { get { throw null; } }
public double ConfidenceScore { get { throw null; } }
public int Length { get { throw null; } }
public int Offset { get { throw null; } }
public string SubCategory { get { throw null; } }
public string Text { get { throw null; } }
}
public partial class PiiEntityCollection : System.Collections.ObjectModel.ReadOnlyCollection<Azure.AI.TextAnalytics.PiiEntity>
{
internal PiiEntityCollection() : base (default(System.Collections.Generic.IList<Azure.AI.TextAnalytics.PiiEntity>)) { }
public string RedactedText { get { throw null; } }
public System.Collections.Generic.IReadOnlyCollection<Azure.AI.TextAnalytics.TextAnalyticsWarning> Warnings { get { throw null; } }
}
public enum PiiEntityDomainType
{
ProtectedHealthInformation = 0,
}
public partial class RecognizeEntitiesResult : Azure.AI.TextAnalytics.TextAnalyticsResult
{
internal RecognizeEntitiesResult() { }
Expand All @@ -194,6 +216,22 @@ public partial class RecognizeLinkedEntitiesResultCollection : System.Collection
public string ModelVersion { get { throw null; } }
public Azure.AI.TextAnalytics.TextDocumentBatchStatistics Statistics { get { throw null; } }
}
public partial class RecognizePiiEntitiesOptions : Azure.AI.TextAnalytics.TextAnalyticsRequestOptions
{
public RecognizePiiEntitiesOptions() { }
public Azure.AI.TextAnalytics.PiiEntityDomainType DomainFilter { get { throw null; } set { } }
}
public partial class RecognizePiiEntitiesResult : Azure.AI.TextAnalytics.TextAnalyticsResult
{
internal RecognizePiiEntitiesResult() { }
public Azure.AI.TextAnalytics.PiiEntityCollection Entities { get { throw null; } }
}
public partial class RecognizePiiEntitiesResultCollection : System.Collections.ObjectModel.ReadOnlyCollection<Azure.AI.TextAnalytics.RecognizePiiEntitiesResult>
{
internal RecognizePiiEntitiesResultCollection() : base (default(System.Collections.Generic.IList<Azure.AI.TextAnalytics.RecognizePiiEntitiesResult>)) { }
public string ModelVersion { get { throw null; } }
public Azure.AI.TextAnalytics.TextDocumentBatchStatistics Statistics { get { throw null; } }
}
[System.Runtime.InteropServices.StructLayoutAttribute(System.Runtime.InteropServices.LayoutKind.Sequential)]
public readonly partial struct SentenceSentiment
{
Expand Down Expand Up @@ -266,6 +304,12 @@ public partial class TextAnalyticsClient
public virtual Azure.Response<Azure.AI.TextAnalytics.RecognizeLinkedEntitiesResultCollection> RecognizeLinkedEntitiesBatch(System.Collections.Generic.IEnumerable<string> documents, string language = null, Azure.AI.TextAnalytics.TextAnalyticsRequestOptions options = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }
public virtual System.Threading.Tasks.Task<Azure.Response<Azure.AI.TextAnalytics.RecognizeLinkedEntitiesResultCollection>> RecognizeLinkedEntitiesBatchAsync(System.Collections.Generic.IEnumerable<Azure.AI.TextAnalytics.TextDocumentInput> documents, Azure.AI.TextAnalytics.TextAnalyticsRequestOptions options = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }
public virtual System.Threading.Tasks.Task<Azure.Response<Azure.AI.TextAnalytics.RecognizeLinkedEntitiesResultCollection>> RecognizeLinkedEntitiesBatchAsync(System.Collections.Generic.IEnumerable<string> documents, string language = null, Azure.AI.TextAnalytics.TextAnalyticsRequestOptions options = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }
public virtual Azure.Response<Azure.AI.TextAnalytics.PiiEntityCollection> RecognizePiiEntities(string document, string language = null, Azure.AI.TextAnalytics.RecognizePiiEntitiesOptions options = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }
public virtual System.Threading.Tasks.Task<Azure.Response<Azure.AI.TextAnalytics.PiiEntityCollection>> RecognizePiiEntitiesAsync(string document, string language = null, Azure.AI.TextAnalytics.RecognizePiiEntitiesOptions options = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }
public virtual Azure.Response<Azure.AI.TextAnalytics.RecognizePiiEntitiesResultCollection> RecognizePiiEntitiesBatch(System.Collections.Generic.IEnumerable<Azure.AI.TextAnalytics.TextDocumentInput> documents, Azure.AI.TextAnalytics.RecognizePiiEntitiesOptions options = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }
public virtual Azure.Response<Azure.AI.TextAnalytics.RecognizePiiEntitiesResultCollection> RecognizePiiEntitiesBatch(System.Collections.Generic.IEnumerable<string> documents, string language = null, Azure.AI.TextAnalytics.RecognizePiiEntitiesOptions options = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }
public virtual System.Threading.Tasks.Task<Azure.Response<Azure.AI.TextAnalytics.RecognizePiiEntitiesResultCollection>> RecognizePiiEntitiesBatchAsync(System.Collections.Generic.IEnumerable<Azure.AI.TextAnalytics.TextDocumentInput> documents, Azure.AI.TextAnalytics.RecognizePiiEntitiesOptions options = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }
public virtual System.Threading.Tasks.Task<Azure.Response<Azure.AI.TextAnalytics.RecognizePiiEntitiesResultCollection>> RecognizePiiEntitiesBatchAsync(System.Collections.Generic.IEnumerable<string> documents, string language = null, Azure.AI.TextAnalytics.RecognizePiiEntitiesOptions options = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }
[System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
public override string ToString() { throw null; }
}
Expand Down Expand Up @@ -350,12 +394,17 @@ public static partial class TextAnalyticsModelFactory
public static Azure.AI.TextAnalytics.LinkedEntityMatch LinkedEntityMatch(string text, double score, int offset, int length) { throw null; }
public static Azure.AI.TextAnalytics.MinedOpinion MinedOpinion(Azure.AI.TextAnalytics.AspectSentiment aspect, System.Collections.Generic.IReadOnlyList<Azure.AI.TextAnalytics.OpinionSentiment> opinions) { throw null; }
public static Azure.AI.TextAnalytics.OpinionSentiment OpinionSentiment(Azure.AI.TextAnalytics.TextSentiment sentiment, double positiveScore, double negativeScore, string text, bool isNegated, int offset, int length) { throw null; }
public static Azure.AI.TextAnalytics.PiiEntity PiiEntity(string text, string category, string subCategory, double score, int offset, int length) { throw null; }
public static Azure.AI.TextAnalytics.PiiEntityCollection PiiEntityCollection(System.Collections.Generic.IList<Azure.AI.TextAnalytics.PiiEntity> entities, string redactedText, System.Collections.Generic.IList<Azure.AI.TextAnalytics.TextAnalyticsWarning> warnings = null) { throw null; }
public static Azure.AI.TextAnalytics.RecognizeEntitiesResult RecognizeEntitiesResult(string id, Azure.AI.TextAnalytics.TextAnalyticsError error) { throw null; }
public static Azure.AI.TextAnalytics.RecognizeEntitiesResult RecognizeEntitiesResult(string id, Azure.AI.TextAnalytics.TextDocumentStatistics statistics, Azure.AI.TextAnalytics.CategorizedEntityCollection entities) { throw null; }
public static Azure.AI.TextAnalytics.RecognizeEntitiesResultCollection RecognizeEntitiesResultCollection(System.Collections.Generic.IEnumerable<Azure.AI.TextAnalytics.RecognizeEntitiesResult> list, Azure.AI.TextAnalytics.TextDocumentBatchStatistics statistics, string modelVersion) { throw null; }
public static Azure.AI.TextAnalytics.RecognizeLinkedEntitiesResult RecognizeLinkedEntitiesResult(string id, Azure.AI.TextAnalytics.TextAnalyticsError error) { throw null; }
public static Azure.AI.TextAnalytics.RecognizeLinkedEntitiesResult RecognizeLinkedEntitiesResult(string id, Azure.AI.TextAnalytics.TextDocumentStatistics statistics, Azure.AI.TextAnalytics.LinkedEntityCollection linkedEntities) { throw null; }
public static Azure.AI.TextAnalytics.RecognizeLinkedEntitiesResultCollection RecognizeLinkedEntitiesResultCollection(System.Collections.Generic.IEnumerable<Azure.AI.TextAnalytics.RecognizeLinkedEntitiesResult> list, Azure.AI.TextAnalytics.TextDocumentBatchStatistics statistics, string modelVersion) { throw null; }
public static Azure.AI.TextAnalytics.RecognizePiiEntitiesResult RecognizePiiEntitiesResult(string id, Azure.AI.TextAnalytics.TextAnalyticsError error) { throw null; }
public static Azure.AI.TextAnalytics.RecognizePiiEntitiesResult RecognizePiiEntitiesResult(string id, Azure.AI.TextAnalytics.TextDocumentStatistics statistics, Azure.AI.TextAnalytics.PiiEntityCollection entities) { throw null; }
public static Azure.AI.TextAnalytics.RecognizePiiEntitiesResultCollection RecognizePiiEntitiesResultCollection(System.Collections.Generic.IEnumerable<Azure.AI.TextAnalytics.RecognizePiiEntitiesResult> list, Azure.AI.TextAnalytics.TextDocumentBatchStatistics statistics, string modelVersion) { throw null; }
[System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
public static Azure.AI.TextAnalytics.SentenceSentiment SentenceSentiment(Azure.AI.TextAnalytics.TextSentiment sentiment, string text, double positiveScore, double neutralScore, double negativeScore) { throw null; }
public static Azure.AI.TextAnalytics.SentenceSentiment SentenceSentiment(Azure.AI.TextAnalytics.TextSentiment sentiment, string text, double positiveScore, double neutralScore, double negativeScore, int offset, int length, System.Collections.Generic.IReadOnlyList<Azure.AI.TextAnalytics.MinedOpinion> minedOpinions) { throw null; }
Expand Down
64 changes: 64 additions & 0 deletions sdk/textanalytics/Azure.AI.TextAnalytics/src/PiiEntity.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

using Azure.AI.TextAnalytics.Models;

namespace Azure.AI.TextAnalytics
{
/// <summary>
/// A word or phrase identified as a Personally Identifiable Information
/// that can be categorized as known type in a given taxonomy.
/// The set of categories recognized by the Text Analytics service is described at
/// <a href="https://aka.ms/tanerpii"/>.
/// </summary>
public readonly struct PiiEntity
{
internal PiiEntity(Entity entity)
{
Category = entity.Category;
Text = entity.Text;
SubCategory = entity.Subcategory;
maririos marked this conversation as resolved.
Show resolved Hide resolved
ConfidenceScore = entity.ConfidenceScore;
Offset = entity.Offset;
Length = entity.Length;
}

/// <summary>
/// Gets the entity text as it appears in the input document.
/// </summary>
public string Text { get; }

/// <summary>
/// Gets the entity category inferred by the Text Analytics service's
/// named entity recognition model, such as Financial Account
/// Identification/Social Security Number/Phone Number, etc.
/// The list of available categories is described at
/// <a href="https://aka.ms/tanerpii"/>.
/// </summary>
public EntityCategory Category { get; }

/// <summary>
/// Gets the sub category of the entity inferred by the Text Analytics service's
/// named entity recognition model. This property may not have a value if
/// a sub category doesn't exist for this entity. The list of available categories and
/// subcategories is described at <a href="https://aka.ms/tanerpii"/>.
/// </summary>
public string SubCategory { get; }

/// <summary>
/// Gets a score between 0 and 1, indicating the confidence that the
/// text substring matches this inferred entity.
/// </summary>
public double ConfidenceScore { get; }

/// <summary>
/// Gets the starting position (in UTF-16 code units) for the matching text in the input document.
/// </summary>
public int Offset { get; }

/// <summary>
/// Gets the length (in UTF-16 code units) of the matching text in the input document.
/// </summary>
public int Length { get; }
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

using System.Collections.Generic;
using System.Collections.ObjectModel;

namespace Azure.AI.TextAnalytics
{
/// <summary>
/// Collection of <see cref="PiiEntity"/> objects in a document.
/// </summary>
public class PiiEntityCollection : ReadOnlyCollection<PiiEntity>
{
internal PiiEntityCollection(IList<PiiEntity> entities, string redactedText, IList<TextAnalyticsWarning> warnings)
: base(entities)
{
RedactedText = redactedText;
Warnings = new ReadOnlyCollection<TextAnalyticsWarning>(warnings);
}

/// <summary>
/// Gets the text of the input document with all of the Personally Identifiable Information
/// redacted out.
/// </summary>
public string RedactedText { get; }

/// <summary>
/// Warnings encountered while processing the document.
/// </summary>
public IReadOnlyCollection<TextAnalyticsWarning> Warnings { get; }
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

namespace Azure.AI.TextAnalytics
{
/// <summary>
/// The different domains of PII entities that users can filter requests by.
/// </summary>
public enum PiiEntityDomainType
{
/// <summary>
/// Protected Health Information entities.
/// For more information see <a href="https://aka.ms/tanerpii"/>.
/// </summary>
ProtectedHealthInformation
}

[System.Diagnostics.CodeAnalysis.SuppressMessage("StyleCop.CSharp.DocumentationRules", "SA1649:File name should match first type name", Justification = "Small extensions, good to keep here.")]
internal static class PiiEntityDomainTypeExtensions
{
internal static string GetString(this PiiEntityDomainType type)
{
return type switch
{
PiiEntityDomainType.ProtectedHealthInformation => "PHI",
_ => null,
};
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

namespace Azure.AI.TextAnalytics
{
/// <summary>
/// Options that allow callers to specify details about how the operation
/// is run and what information is returned from it by the service.
/// </summary>
public class RecognizePiiEntitiesOptions : TextAnalyticsRequestOptions
{
/// <summary>
/// Initializes a new instance of the <see cref="RecognizePiiEntitiesOptions"/>
/// class.
/// </summary>
public RecognizePiiEntitiesOptions()
{
}

/// <summary>
/// Filters the response entities to ones only included in the specified domain.
/// For more information see <a href="https://aka.ms/tanerpii"/>.
/// </summary>
public PiiEntityDomainType DomainFilter { get; set; }
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

using System;

namespace Azure.AI.TextAnalytics
{
/// <summary>
/// The result of the recognize PII entities operation on a
/// document, containing a collection of the <see cref="PiiEntity"/>
/// objects containing Personally Identifiable Information that were
/// found in that document.
/// </summary>
public class RecognizePiiEntitiesResult : TextAnalyticsResult
{
private readonly PiiEntityCollection _entities;

internal RecognizePiiEntitiesResult(string id, TextDocumentStatistics statistics, PiiEntityCollection entities)
: base(id, statistics)
{
_entities = entities;
}

internal RecognizePiiEntitiesResult(string id, TextAnalyticsError error) : base(id, error) { }

/// <summary>
/// Gets the collection of PII entities containing Personally
/// Identifiable Information in the document.
/// </summary>
public PiiEntityCollection Entities
{
get
{
if (HasError)
{
#pragma warning disable CA1065 // Do not raise exceptions in unexpected locations
throw new InvalidOperationException($"Cannot access result for document {Id}, due to error {Error.ErrorCode}: {Error.Message}");
#pragma warning restore CA1065 // Do not raise exceptions in unexpected locations
}
return _entities;
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

using System.Collections.Generic;
using System.Collections.ObjectModel;

namespace Azure.AI.TextAnalytics
{
/// <summary>
/// Collection of <see cref="RecognizePiiEntitiesResult"/> objects corresponding
/// to a batch of documents, and information about the batch operation.
/// </summary>
public class RecognizePiiEntitiesResultCollection : ReadOnlyCollection<RecognizePiiEntitiesResult>
{
internal RecognizePiiEntitiesResultCollection(IList<RecognizePiiEntitiesResult> list, TextDocumentBatchStatistics statistics, string modelVersion) : base(list)
{
Statistics = statistics;
ModelVersion = modelVersion;
}

/// <summary>
/// Gets statistics about the documents and how it was processed
/// by the service. This property will have a value when IncludeStatistics
/// is set to true in the client call.
/// </summary>
public TextDocumentBatchStatistics Statistics { get; }

/// <summary>
/// Gets the version of the Text Analytics model used by this operation
/// on this batch of documents.
/// </summary>
public string ModelVersion { get; }
}
}