In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# required imports
import text_analytics
import requests
import json

In [3]:
from text_analytics import TextService

In [25]:
service = TextService(region="westeurope",key='4491e6fce8b342919a1a5303a8eb087d')

## Detect Language

In [7]:
text_list = ['Hello world',
             'Bonjour tout le monde',
             'La carretera estaba atascada. Había mucho tráfico el día de ayer.',
             ':) :( :D']

In [43]:
documents = service.BuildDocumentList(text_list)
pprint(documents)

{'documents': [{'id': '0', 'text': 'Hello world'},
               {'id': '1', 'text': 'Bonjour tout le monde'},
               {'id': '2',
                'text': 'La carretera estaba atascada. Había mucho tráfico el '
                        'día de ayer.'},
               {'id': '3', 'text': ':) :( :D'}]}


In [41]:
languages = service.DetectLanguage(documents)
pprint(languages)

{'documents': [{'detectedLanguages': [{'iso6391Name': 'en',
                                       'name': 'English',
                                       'score': 1.0}],
                'id': '0'},
               {'detectedLanguages': [{'iso6391Name': 'fr',
                                       'name': 'French',
                                       'score': 1.0}],
                'id': '1'},
               {'detectedLanguages': [{'iso6391Name': 'es',
                                       'name': 'Spanish',
                                       'score': 1.0}],
                'id': '2'},
               {'detectedLanguages': [{'iso6391Name': 'en',
                                       'name': 'English',
                                       'score': 1.0}],
                'id': '3'}],
 'errors': []}


In [42]:
from IPython.display import HTML
table = []
for document in languages["documents"]:
    text  = next(filter(lambda d: d["id"] == document["id"], documents["documents"]))["text"]
    langs = ", ".join(["{0}({1})".format(lang["name"], lang["score"]) for lang in document["detectedLanguages"]])
    table.append("<tr><td>{0}</td><td>{1}</td>".format(text, langs))
HTML("<table><tr><th>Text</th><th>Detected languages(scores)</th></tr>{0}</table>".format("\n".join(table)))

Text,Detected languages(scores)
Hello world,English(1.0)
Bonjour tout le monde,French(1.0)
La carretera estaba atascada. Había mucho tráfico el día de ayer.,Spanish(1.0)
:) :( :D,English(1.0)


## Analyze Sentiment

In [44]:
documents = {'documents' : [
  {'id': '1', 'language': 'en', 'text': 'I had a wonderful experience! The rooms were wonderful and the staff was helpful.'},
  {'id': '2', 'language': 'en', 'text': 'I had a terrible time at the hotel. The staff was rude and the food was awful.'},  
  {'id': '3', 'language': 'es', 'text': 'Los caminos que llevan hasta Monte Rainier son espectaculares y hermosos.'},  
  {'id': '4', 'language': 'es', 'text': 'La carretera estaba atascada. Había mucho tráfico el día de ayer.'}
]}

In [45]:
sentiment = service.AnalyzeSentiment(documents)
pprint(sentiment)

{'documents': [{'id': '1', 'score': 0.9708490371704102},
               {'id': '2', 'score': 0.0019068121910095215},
               {'id': '3', 'score': 0.7456425428390503},
               {'id': '4', 'score': 0.334433376789093}],
 'errors': []}


In [51]:
from IPython.display import HTML
table = []
for document in sentiment["documents"]:
    text  = next(filter(lambda d: d["id"] == document["id"], documents["documents"]))["text"]
    scores = "{0}".format(document["score"])
    table.append("<tr><td>{0}</td><td>{1}</td>".format(text, scores))
HTML("<table><tr><th>Text</th><th>Detected languages(scores)</th></tr>{0}</table>".format("\n".join(table)))

Text,Detected languages(scores)
I had a wonderful experience! The rooms were wonderful and the staff was helpful.,0.9708490371704102
I had a terrible time at the hotel. The staff was rude and the food was awful.,0.0019068121910095
Los caminos que llevan hasta Monte Rainier son espectaculares y hermosos.,0.7456425428390503
La carretera estaba atascada. Había mucho tráfico el día de ayer.,0.334433376789093


## Key Phrases

In [53]:
key_phrases = service.ExtractKeyPhrases(documents)
pprint(key_phrases)

{'documents': [{'id': '1',
                'keyPhrases': ['wonderful experience', 'staff', 'rooms']},
               {'id': '2',
                'keyPhrases': ['food', 'terrible time', 'hotel', 'staff']},
               {'id': '3', 'keyPhrases': ['Monte Rainier', 'caminos']},
               {'id': '4', 'keyPhrases': ['carretera', 'tráfico', 'día']}],
 'errors': []}


In [54]:
from IPython.display import HTML
table = []
for document in key_phrases["documents"]:
    text    = next(filter(lambda d: d["id"] == document["id"], documents["documents"]))["text"]    
    phrases = ",".join(document["keyPhrases"])
    table.append("<tr><td>{0}</td><td>{1}</td>".format(text, phrases))
HTML("<table><tr><th>Text</th><th>Key phrases</th></tr>{0}</table>".format("\n".join(table)))

Text,Key phrases
I had a wonderful experience! The rooms were wonderful and the staff was helpful.,"wonderful experience,staff,rooms"
I had a terrible time at the hotel. The staff was rude and the food was awful.,"food,terrible time,hotel,staff"
Los caminos que llevan hasta Monte Rainier son espectaculares y hermosos.,"Monte Rainier,caminos"
La carretera estaba atascada. Había mucho tráfico el día de ayer.,"carretera,tráfico,día"


## Identity Entities

In [55]:
documents = {'documents' : [
  {'id': '1', 'text': 'Jeff bought three dozen eggs because there was a 50% discount.'},
  {'id': '2', 'text': 'The Great Depression began in 1929. By 1933, the GDP in America fell by 25%.'}
]}

In [56]:
entities = service.IdentityEntities(documents)
pprint(entities)

{'documents': [{'entities': [{'bingId': '4764c74d-ece6-1778-5d5b-1adb255130c2',
                              'matches': [{'length': 4,
                                           'offset': 0,
                                           'text': 'Jeff'}],
                              'name': 'Jeffster!',
                              'type': 'Person',
                              'wikipediaId': 'Jeffster!',
                              'wikipediaLanguage': 'en',
                              'wikipediaUrl': 'https://en.wikipedia.org/wiki/Jeffster!'},
                             {'matches': [{'length': 11,
                                           'offset': 12,
                                           'text': 'three dozen'}],
                              'name': 'three dozen',
                              'subType': 'Number',
                              'type': 'Quantity'},
                             {'bingId': 'f0ae496f-3c3b-21c7-0946-0d05ffe6e06e',
                          