Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[text analytics] add bing_id property to LinkedEntity class #13446

Merged
merged 8 commits into from
Sep 1, 2020
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ pass in `v3.0` to the kwarg `api_version` when creating your TextAnalyticsClient
- `offset` is the offset of the text from the start of the document
- We now have added support for opinion mining. To use this feature, you need to make sure you are using the service's
v3.1-preview.1 API. To get this support pass `show_opinion_mining` as True when calling the `analyze_sentiment` endpoint
- Add property `bing_id` to the `LinkedEntity` class. This property is only available for v3.1-preview.2 and up, and it is to be
used in conjunction with the Bing Entity Search API to fetch additional relevant information about the returned entity.

## 5.0.0 (2020-07-27)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ class TextAnalyticsApiVersion(str, Enum):

#: this is the default version
V3_1_PREVIEW_1 = "v3.1-preview.1"

# 3.1-preview.2 is not yet the default version since we don't have a
# reliable endpoint
V3_1_PREVIEW_2 = "v3.1-preview.2"
V3_0 = "v3.0"

def _authentication_policy(credential):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# coding=utf-8
# coding=utf-8 pylint: disable=too-many-lines
# ------------------------------------
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
Expand Down Expand Up @@ -611,6 +611,11 @@ class LinkedEntity(DictMixin):
:ivar data_source: Data source used to extract entity linking,
such as Wiki/Bing etc.
:vartype data_source: str
:ivar str bing_id: Bing unique identifier of the recognized entity. Use in conjunction
with the Bing Entity Search SDK to fetch additional relevant information. Only
available for API version v3.1-preview.2 and up.
.. versionadded:: v3.1-preview.2
The *bing_id* property.
"""

def __init__(self, **kwargs):
Expand All @@ -620,22 +625,32 @@ def __init__(self, **kwargs):
self.data_source_entity_id = kwargs.get("data_source_entity_id", None)
self.url = kwargs.get("url", None)
self.data_source = kwargs.get("data_source", None)
self.bing_id = kwargs.get("bing_id", None)

@classmethod
def _from_generated(cls, entity):
bing_id = entity.bing_id if hasattr(entity, "bing_id") else None
return cls(
name=entity.name,
matches=[LinkedEntityMatch._from_generated(e) for e in entity.matches], # pylint: disable=protected-access
language=entity.language,
data_source_entity_id=entity.id,
url=entity.url,
data_source=entity.data_source,
bing_id=bing_id,
)

def __repr__(self):
return "LinkedEntity(name={}, matches={}, language={}, data_source_entity_id={}, url={}, " \
"data_source={})".format(self.name, repr(self.matches), self.language, self.data_source_entity_id,
self.url, self.data_source)[:1024]
"data_source={}, bing_id={})".format(
self.name,
repr(self.matches),
self.language,
self.data_source_entity_id,
self.url,
self.data_source,
self.bing_id,
)[:1024]


class LinkedEntityMatch(DictMixin):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
interactions:
- request:
body: '{"documents": [{"id": "0", "text": "Microsoft was founded by Bill Gates
and Paul Allen", "language": "en"}]}'
headers:
Accept:
- application/json, text/json
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
Content-Length:
- '108'
Content-Type:
- application/json
User-Agent:
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
method: POST
uri: https://cognitiveusw2dev.azure-api.net/text/analytics/v3.1-preview.2/entities/linking?showStats=false&stringIndexType=UnicodeCodePoint
response:
body:
string: '{"documents":[{"id":"0","entities":[{"bingId":"0d47c987-0042-5576-15e8-97af601614fa","name":"Bill
Gates","matches":[{"text":"Bill Gates","offset":25,"length":10,"confidenceScore":0.52}],"language":"en","id":"Bill
Gates","url":"https://en.wikipedia.org/wiki/Bill_Gates","dataSource":"Wikipedia"},{"bingId":"df2c4376-9923-6a54-893f-2ee5a5badbc7","name":"Paul
Allen","matches":[{"text":"Paul Allen","offset":40,"length":10,"confidenceScore":0.54}],"language":"en","id":"Paul
Allen","url":"https://en.wikipedia.org/wiki/Paul_Allen","dataSource":"Wikipedia"},{"bingId":"a093e9b9-90f5-a3d5-c4b8-5855e1b01f85","name":"Microsoft","matches":[{"text":"Microsoft","offset":0,"length":9,"confidenceScore":0.49}],"language":"en","id":"Microsoft","url":"https://en.wikipedia.org/wiki/Microsoft","dataSource":"Wikipedia"}],"warnings":[]}],"errors":[],"modelVersion":"2020-02-01"}'
headers:
apim-request-id:
- 34b34e81-fcc2-4c1e-85b2-116f85196a4c
content-type:
- application/json; charset=utf-8
csp-billing-usage:
- CognitiveServices.TextAnalytics.BatchScoring=1
date:
- Mon, 31 Aug 2020 18:48:40 GMT
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
transfer-encoding:
- chunked
x-content-type-options:
- nosniff
x-envoy-upstream-service-time:
- '27'
status:
code: 200
message: OK
version: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
interactions:
- request:
body: '{"documents": [{"id": "0", "text": "Microsoft was founded by Bill Gates
and Paul Allen", "language": "en"}]}'
headers:
Accept:
- application/json, text/json
Content-Length:
- '108'
Content-Type:
- application/json
User-Agent:
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
method: POST
uri: https://cognitiveusw2dev.azure-api.net/text/analytics/v3.1-preview.2/entities/linking?showStats=false&stringIndexType=UnicodeCodePoint
response:
body:
string: '{"documents":[{"id":"0","entities":[{"bingId":"0d47c987-0042-5576-15e8-97af601614fa","name":"Bill
Gates","matches":[{"text":"Bill Gates","offset":25,"length":10,"confidenceScore":0.52}],"language":"en","id":"Bill
Gates","url":"https://en.wikipedia.org/wiki/Bill_Gates","dataSource":"Wikipedia"},{"bingId":"df2c4376-9923-6a54-893f-2ee5a5badbc7","name":"Paul
Allen","matches":[{"text":"Paul Allen","offset":40,"length":10,"confidenceScore":0.54}],"language":"en","id":"Paul
Allen","url":"https://en.wikipedia.org/wiki/Paul_Allen","dataSource":"Wikipedia"},{"bingId":"a093e9b9-90f5-a3d5-c4b8-5855e1b01f85","name":"Microsoft","matches":[{"text":"Microsoft","offset":0,"length":9,"confidenceScore":0.49}],"language":"en","id":"Microsoft","url":"https://en.wikipedia.org/wiki/Microsoft","dataSource":"Wikipedia"}],"warnings":[]}],"errors":[],"modelVersion":"2020-02-01"}'
headers:
apim-request-id: 70ab796e-3da1-4a55-86b4-16c4b19a97a8
content-type: application/json; charset=utf-8
csp-billing-usage: CognitiveServices.TextAnalytics.BatchScoring=1
date: Mon, 31 Aug 2020 18:48:41 GMT
strict-transport-security: max-age=31536000; includeSubDomains; preload
transfer-encoding: chunked
x-content-type-options: nosniff
x-envoy-upstream-service-time: '26'
status:
code: 200
message: OK
url: https://cognitiveusw2dev.azure-api.net/text/analytics/v3.1-preview.2/entities/linking?showStats=false&stringIndexType=UnicodeCodePoint
version: 1
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
# ------------------------------------

import os
import pytest
import platform
import functools
Expand Down Expand Up @@ -586,3 +586,17 @@ def test_string_index_type_not_fail_v3(self, client):
# make sure that the addition of the string_index_type kwarg for v3.1-preview.1 doesn't
# cause v3.0 calls to fail
client.recognize_linked_entities(["please don't fail"])

# currently only have this as playback since the dev endpoint is unreliable
@pytest.mark.playback_test_only
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we open an issue to remove playback marker when endpoint works reliably? (sorry for nit, I feel like these kinds of things are too easy to forget about)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

np, good point here you go bb

@GlobalTextAnalyticsAccountPreparer()
@TextAnalyticsClientPreparer(client_kwargs={
"api_version": TextAnalyticsApiVersion.V3_1_PREVIEW_2,
"text_analytics_account_key": os.environ.get('AZURE_TEXT_ANALYTICS_KEY'),
"text_analytics_account": "https://cognitiveusw2dev.azure-api.net/"
})
def test_bing_id(self, client):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What are u testing here? not sure I understand the purpose of the test.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this one is just to check that I've correctly set the bing ID I get back from the service. I didn't tie it to the actual bing ID returned from the service though, I'm not sure how arbitrary that number is

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do u know if the bingId property will always be populated?
worth adding a test checking that the property has content on it

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should be populated by the server for v3.1-preview.2 and up (it's not shown as an optional parameter). I'm confused by "worth adding a test checking that the property has content on it", I believe that's what I'm checking in this test

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be me and not knowing ton of Python, but assert entity doesn't seem to be checking that bing_id has something on it. like it is not None or null, or empty

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

assert checks that it's not None and not an empty string

result = client.recognize_linked_entities(["Microsoft was founded by Bill Gates and Paul Allen"])
for doc in result:
for entity in doc.entities:
assert entity # this checks if it's None and if it's empty
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
# ------------------------------------

import os
import pytest
import platform
import functools
Expand Down Expand Up @@ -622,3 +622,17 @@ async def test_string_index_type_not_fail_v3(self, client):
# make sure that the addition of the string_index_type kwarg for v3.1-preview.1 doesn't
# cause v3.0 calls to fail
await client.recognize_linked_entities(["please don't fail"])

# currently only have this as playback since the dev endpoint is unreliable
@pytest.mark.playback_test_only
@GlobalTextAnalyticsAccountPreparer()
@TextAnalyticsClientPreparer(client_kwargs={
"api_version": TextAnalyticsApiVersion.V3_1_PREVIEW_2,
"text_analytics_account_key": os.environ.get('AZURE_TEXT_ANALYTICS_KEY'),
"text_analytics_account": "https://cognitiveusw2dev.azure-api.net/"
})
async def test_bing_id(self, client):
result = await client.recognize_linked_entities(["Microsoft was founded by Bill Gates and Paul Allen"])
for doc in result:
for entity in doc.entities:
assert entity # this checks if it's None and if it's empty
7 changes: 5 additions & 2 deletions sdk/textanalytics/azure-ai-textanalytics/tests/test_repr.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,12 +116,15 @@ def linked_entity(linked_entity_match):
language="English",
data_source_entity_id="Bill Gates",
url="https://en.wikipedia.org/wiki/Bill_Gates",
data_source="wikipedia"
data_source="wikipedia",
bing_id="12345678"
)
model_repr = (
"LinkedEntity(name=Bill Gates, matches=[{}, {}], "\
"language=English, data_source_entity_id=Bill Gates, "\
"url=https://en.wikipedia.org/wiki/Bill_Gates, data_source=wikipedia)".format(linked_entity_match[1], linked_entity_match[1])
"url=https://en.wikipedia.org/wiki/Bill_Gates, data_source=wikipedia, bing_id=12345678)".format(
linked_entity_match[1], linked_entity_match[1]
)
)
assert repr(model) == model_repr
return model, model_repr
Expand Down