In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Content Moderation with NLP API

Based on [Using the Natural Language API with Python.ipynb](https://github.com/GoogleCloudPlatform/devrel-demos/blob/main/other/colab/Using%20the%20Natural%20Language%20API%20with%20Python.ipynb)

![content moderation architecture](images/1.4-content-moderation.png)

The [Natural Language API](https://cloud.google.com/natural-language/docs/) lets you extract information from unstructured text using Google machine learning. In this tutorial, you'll focus on using its Python client library to perform the following:

- Sentiment analysis
- Entity analysis
- Syntax analysis
- Content classification
- Text moderation

This notebook requires a Google Cloud project:

- If needed, [create a new Google Cloud project](https://console.cloud.google.com/cloud-resource-manager).
- Make sure that billing is enabled for your project.
- It uses billable services but not should generate any cost (see the Natural Language API [free monthly thresholds](https://cloud.google.com/natural-language/pricing)).

## Setup

### Install packages

Install the Natural Language API package and the notebook packages and then restart the session.

In [None]:
! pip install --upgrade --quiet pandas==1.5.3 Jinja2==3.1.2 ipywidgets==7.7.1 google-cloud-language==2.11.0

In [None]:
import sys

In [None]:
running_in_colab = "google.colab" in sys.modules
if running_in_colab:
    from google.colab import auth as colab_auth

In [None]:
def gcloud(command: str) -> list[str]:
    command = command.replace("'", '"')
    lines = !gcloud $command
    if any("ERROR:" in line for line in lines):
        raise StopExecution(lines.nlstr)
    return lines.list

def get_active_account() -> str:
    lines = gcloud("config list --format 'value(core.account)'")
    return lines[0] if lines else ""


def is_authenticated() -> bool:
    return get_active_account() not in ["", "default"]

if not (authenticated := is_authenticated()):
    print(f"🔑 Authenticate to access your Google Cloud services")
    if running_in_colab:
        colab_auth.authenticate_user()
    else:
        gcloud("auth login --brief")
    authenticated = is_authenticated()

if authenticated:
    print(f"✔️ Authenticated")
else:
    raise StopExecution("❌ Could not authenticate")

In [None]:
def get_default_project_id() -> str:
    lines = gcloud("config list --format 'value(core.project)'")
    return lines[0] if lines else ""


def get_active_project_ids() -> list[str]:
    return gcloud("projects list --format 'value(projectId)'")


def update_project_id(project_id: str | None = None):
    if not project_id:
        raise StopExecution("❌ Please select your project")
    global PROJECT_ID
    if PROJECT_ID != project_id:
        PROJECT_ID = project_id
        if running_in_colab:
            colab_auth.authenticate_user(project_id=PROJECT_ID)
        else:
            print("⚙️ Revoking Application Default Credentials…")
            gcloud("auth application-default revoke --quiet")
            print("⚙️ Setting Application Default Credentials…")
            gcloud(f"auth application-default login --project {PROJECT_ID}")
            print("⚙️ Setting default project…")
            gcloud(f"config set project {PROJECT_ID}")
    print(f"✔️ PROJECT_ID: {PROJECT_ID}")


def show_project_ids(project_ids: list[str]):
    import ipywidgets

    @ipywidgets.interact(PROJECT_ID=project_ids)
    def on_project_id(PROJECT_ID: str | None = None):
        update_project_id(PROJECT_ID)


if PROJECT_ID := get_default_project_id():
    update_project_id(PROJECT_ID)
else:
    project_ids = get_active_project_ids()
    if len(project_ids) == 1:  # Single project (e.g. onboarding developer)
        update_project_id(project_ids[0])
    else:
        show_project_ids(project_ids)

In [None]:
def get_enabled_services() -> list[str]:
    return gcloud("services list --enabled --format 'value(config.name)'")


if not PROJECT_ID:
    raise StopExecution("❌ PROJECT_ID is undefined")

enabled_services = get_enabled_services()
services_to_enable = [
    "language.googleapis.com"
]
if services_to_enable:
    api_or_apis = "APIs" if 1 < len(services_to_enable) else "API"
    spaced_services = " ".join(services_to_enable)
    print(f'🔓 Enabling {api_or_apis} "{spaced_services}"…')
    gcloud(f"services enable {spaced_services}")
    enabled_services = get_enabled_services()

for service in services_to_enable:
    if service in enabled_services:
        print(f'✔️ API "{service}" is enabled')
    else:
        raise StopExecution(f'❌ Failed to enable API "{service}"')

In [None]:
import pandas as pd
from IPython.display import display


def show_table(columns, data, formats=None, remove_empty_columns=False):
    df = pd.DataFrame(columns=columns, data=data)
    if remove_empty_columns:
        empty_cols = [col for col in df if df[col].eq("").all()]
        df.drop(empty_cols, axis=1, inplace=True)
    # Customize formatting
    styler = df.style
    if formats:
        styler.format(formats)
    # Left-align string columns
    df = df.convert_dtypes()
    str_cols = list(df.select_dtypes("string").keys())
    styler = styler.set_properties(subset=str_cols, **{"text-align": "left"})
    # Center headers
    styler.set_table_styles([{"selector": "th", "props": [("text-align", "center")]}])
    styler.hide()
    display(styler)


print(f"✔️ Helpers defined")

## Using the Python client library

You can use the Natural Language API in Python with the client library `google-cloud-language` and the following import:

In [None]:
from google.cloud import language_v1 as language

## Text Moderation

Text moderation identifies a wide range of harmful content, including hate speech, bullying, and sexual harassment. It is performed with the `moderate_text` method which returns a `ModerateTextResponse`.

In [None]:
from google.cloud import language_v1 as language


def moderate_text(text: str) -> language.ModerateTextResponse:
    client = language.LanguageServiceClient()
    document = language.Document(
        content=text,
        type_=language.Document.Type.PLAIN_TEXT,
    )
    return client.moderate_text(document=document)


def show_text_moderation(response: language.ModerateTextResponse):
    def confidence(category: language.ClassificationCategory) -> float:
        return category.confidence

    columns = ["category", "confidence"]
    categories = response.moderation_categories
    sorted_categories = sorted(categories, key=confidence, reverse=True)
    data = ((category.name, category.confidence) for category in sorted_categories)
    formats = {"confidence": "{:.0%}"}
    show_table(columns, data, formats)

In [None]:
# Input
text = "I have to read Ulysses by James Joyce and am a little over halfway through. I hate it. What a pile of garbage!"  # @param {type:"string"}

# Send a request to the API
moderate_text_response = moderate_text(text)

# Show the results
show_text_moderation(moderate_text_response)

For more information, see the following docs:

- [`ModerateTextResponse`](https://cloud.google.com/python/docs/reference/language/latest/google.cloud.language_v1.types.ModerateTextResponse)
- [Language Support](https://cloud.google.com/natural-language/docs/languages#content_classification)
- [Moderating Text](https://cloud.google.com/natural-language/docs/moderating-text)