In [None]:
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Using the Natural Language API with Python

This is a quick port to Colab for the tutorial [Using the Natural Language API with Python](https://codelabs.developers.google.com/codelabs/cloud-natural-language-python3).

<table align="left">
<tr>
<td><a href="https://colab.research.google.com/github/PicardParis/cloud-snippets/blob/main/python/colab/Using the Natural Language API with Python.ipynb">
<img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Colab logo" align="center"> Run in Colab
</a></td>
<td><a href="https://github.com/PicardParis/cloud-snippets/blob/main/python/colab/Using the Natural Language API with Python.ipynb">
<img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo" align="center"> View on GitHub
</a></td>
</tr>
</table>

## Setup

In [None]:
# Is the notebook running in Colab?
import sys
assert "google.colab" in sys.modules, "Notebook not tested outside of Colab"

print(f"✔️ Running in Colab")

In [None]:
# Is the client library installed? (will restart if necessary)
try:
    from importlib.metadata import version, PackageNotFoundError
except ModuleNotFoundError:
    from importlib_metadata import version, PackageNotFoundError

PACKAGE = "google.cloud.language"
MINIMUM_MAJOR = 2

install = False
try:
    package_version = version(PACKAGE)
    major = int(package_version.split(".")[0])
    if major < MINIMUM_MAJOR:
        install = True
        print(f"{PACKAGE} is going to be updated...")
except PackageNotFoundError:
    install = True
    print(f"{PACKAGE} is going to be installed...")

if install:
    # Install and restart
    %pip install --upgrade $PACKAGE
    get_ipython().kernel.do_shutdown(True)
    raise RuntimeWarning("🔄 Restarting... Run the cell again. You can ignore the warnings.")

print(f"✔️ {PACKAGE}=={package_version}")

if MINIMUM_MAJOR < major:
    print(f"\nNote: This notebook has only been tested with versions {MINIMUM_MAJOR}.*")

Set up your Google Cloud project
- If needed, [create a new Google Cloud project](https://console.cloud.google.com/cloud-resource-manager).
- Make sure that billing is enabled for your project.
- Enter your project ID in the cell below.

Note: This tutorial uses billable services but not should generate any cost (see Cloud Natural Language [free monthly thresholds](https://cloud.google.com/natural-language/pricing)).

In [None]:
# Set your Google Cloud project ID
PROJECT_ID = ""
assert PROJECT_ID, "Project ID is undefined"

!gcloud config set project $PROJECT_ID

In [None]:
# Check that gcloud is associated with your Google Cloud account (log in if necessary)
res = !gcloud config list --format "value(core.account)"
account = res[0]

if not account:
    !gcloud auth login

    res = !gcloud config list --format "value(core.account)"
    account = res[0]
    assert account, "You need to be logged in"

print(f"✔️ Logged in with account: {account}")

In [None]:
# Enable the API
!gcloud services enable language.googleapis.com

In [None]:
from pathlib import Path

# Define the path to the service account credentials
WORK_DIR = Path("/home/cloud-natural-language-python")
WORK_DIR.mkdir(exist_ok=True)
GOOGLE_APPLICATION_CREDENTIALS = f"{WORK_DIR}/key.json"

# The environment variable is used by the client libraries
%env GOOGLE_APPLICATION_CREDENTIALS = $GOOGLE_APPLICATION_CREDENTIALS

# Create the service account if necessary
SERVICE_ACCOUNT_NAME = "my-nl-sa"
SERVICE_ACCOUNT = f"{SERVICE_ACCOUNT_NAME}@{PROJECT_ID}.iam.gserviceaccount.com"

res = !gcloud iam service-accounts describe $SERVICE_ACCOUNT --format "value(email)"
if res[0] != SERVICE_ACCOUNT:
    !gcloud iam service-accounts create my-nl-sa --display-name "my nl service account"

# Download the service account credentials if necessary
credentials = Path(GOOGLE_APPLICATION_CREDENTIALS)
if not credentials.is_file():
    !gcloud iam service-accounts keys create $GOOGLE_APPLICATION_CREDENTIALS --iam-account $SERVICE_ACCOUNT

assert credentials.is_file()
print("✔️ Credentials:")
!head -n 3 $GOOGLE_APPLICATION_CREDENTIALS
print("  ...\n}")

## 1. Sentiment analysis

In [None]:
from google.cloud import language


def analyze_text_sentiment(text):
    client = language.LanguageServiceClient()
    document = language.Document(content=text, type_=language.Document.Type.PLAIN_TEXT)

    response = client.analyze_sentiment(document=document)

    sentiment = response.document_sentiment
    results = dict(
        text=text,
        score=f"{sentiment.score:.1%}",
        magnitude=f"{sentiment.magnitude:.1%}",
    )
    for k, v in results.items():
        print(f"{k:10}: {v}")

In [None]:
text = "Guido van Rossum is great!"
analyze_text_sentiment(text)

## 2. Entity analysis

In [None]:
from google.cloud import language


def analyze_text_entities(text):
    client = language.LanguageServiceClient()
    document = language.Document(content=text, type_=language.Document.Type.PLAIN_TEXT)

    response = client.analyze_entities(document=document)

    for entity in response.entities:
        print("=" * 80)
        results = dict(
            name=entity.name,
            type=entity.type_.name,
            salience=f"{entity.salience:.1%}",
            wikipedia_url=entity.metadata.get("wikipedia_url", "-"),
            mid=entity.metadata.get("mid", "-"),
        )
        for k, v in results.items():
            print(f"{k:15}: {v}")

In [None]:
text = "Guido van Rossum is great, and so is Python!"
analyze_text_entities(text)

## 3. Syntax analysis

In [None]:
from google.cloud import language


def analyze_text_syntax(text):
    client = language.LanguageServiceClient()
    document = language.Document(content=text, type_=language.Document.Type.PLAIN_TEXT)

    response = client.analyze_syntax(document=document)

    fmts = "{:10}: {}"
    print(fmts.format("sentences", len(response.sentences)))
    print(fmts.format("tokens", len(response.tokens)))
    for token in response.tokens:
        print(fmts.format(token.part_of_speech.tag.name, token.text.content))

In [None]:
text = "Guido van Rossum is great!"
analyze_text_syntax(text)

## 4. Content classification

In [None]:
from google.cloud import language


def classify_text(text):
    client = language.LanguageServiceClient()
    document = language.Document(content=text, type_=language.Document.Type.PLAIN_TEXT)

    response = client.classify_text(document=document)

    for category in response.categories:
        print("=" * 80)
        print(f"category  : {category.name}")
        print(f"confidence: {category.confidence:.0%}")

In [None]:
text = (
    "Python is an interpreted, high-level, general-purpose programming language. "
    "Created by Guido van Rossum and first released in 1991, "
    "Python's design philosophy emphasizes code readability "
    "with its notable use of significant whitespace."
)
classify_text(text)