# Lab on the integration of Natural Language Understanding (NLU)

### NLU documentation: https://cloud.ibm.com/docs/natural-language-understanding?topic=natural-language-understanding-about NLU API reference: https://cloud.ibm.com/apidocs/natural-language-understanding?code=python#introduction
## Python SKD https://cloud.ibm.com/docs/cloud-object-storage?topic=cloud-object-storage-python

In [None]:
# list all the installed modules in the environment
! pip freeze

In [2]:
# install required module 
! pip install --upgrade "ibm-watson>=8.0.0"

Collecting ibm-watson>=8.0.0
  Downloading ibm_watson-10.0.0.tar.gz (359 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m359.4/359.4 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Collecting websocket-client>=1.1.0 (from ibm-watson>=8.0.0)
  Downloading websocket_client-1.8.0-py3-none-any.whl.metadata (8.0 kB)
Downloading websocket_client-1.8.0-py3-none-any.whl (58 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.8/58.8 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: ibm-watson
  Building wheel for ibm-watson (pyproject.toml) ... [?25ldone
[?25h  Created wheel for ibm-watson: filename=ibm_watson-10.0.0-py3-none-any.whl size=361969 sha256=3edde122a2198cca9f6b8097e8bb260b293fc55c098dc4d5fbbac229337b7e13
  Stored in dire

In [3]:
# authentication 

# import relevant functions
from ibm_watson import NaturalLanguageUnderstandingV1
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator

# authenticate with the proviosion instance of NLU created on IBM Cloud
APIKEY =  "\\"
NLU_VERSION = "2022-04-07"
NLU_URL = "https://api.eu-de.natural-language-understanding.watson.cloud.ibm.com/instances/d6531fd0-e98d-4748-a238-1f5b408f88e5"

authenticator = IAMAuthenticator(APIKEY)
natural_language_understanding = NaturalLanguageUnderstandingV1(
    version = NLU_VERSION,
    authenticator = authenticator
)

natural_language_understanding.set_service_url(NLU_URL)

In [13]:
# Use NLU to analyze a particulare text

# import the functions to prepare the payload expected by the service
from ibm_watson.natural_language_understanding_v1 import Features, EntitiesOptions, KeywordsOptions

response = natural_language_understanding.analyze(
    text ='Domani ho un esame di cloud computing e sono pronto a prendere un bel 30 e lode',
    features = Features(
        entities = EntitiesOptions(emotion=True, sentiment=True, limit=2),
        keywords = KeywordsOptions(emotion=True, sentiment=True, limit=2))
    ).get_result()

In [None]:
# access the results stored in the "response" variable
import json

print(json.dumps(response, indent=2))

## Customize NLU request

In [7]:
# Use NLU to 
# 1) analyze a web page
# 2) analyze the sentiment of the page
# 3) increase the number of results returned (keywords and entities)


# import the functions to prepare the payload expected by the service
from ibm_watson.natural_language_understanding_v1 import Features, EntitiesOptions, KeywordsOptions, SentimentOptions

# 1) change "text" param to "URL" param
# 2) setimenti: added the option SentimentOptions and iserted the param "Sentiment" inside the "features" part of the request
# 3) remove emotion and sentiment from keyword and entities request but increased the "limit" param value

TARGET_URL = 'https://en.wikipedia.org/wiki/Fall_of_Constantinople'

response = natural_language_understanding.analyze(
    url = TARGET_URL,
    features = Features(
        entities = EntitiesOptions(limit=5),
        keywords = KeywordsOptions(limit=10),
        sentiment = SentimentOptions(document= True))
    ).get_result()

# access the results stored in the "response" variable
import json

print(json.dumps(response, indent=2))

{
  "usage": {
    "text_units": 5,
    "text_characters": 50000,
    "features": 3
  },
  "sentiment": {
    "document": {
      "score": -0.449239,
      "mixed": "1",
      "label": "negative"
    }
  },
  "retrieved_url": "https://en.wikipedia.org/wiki/Fall_of_Constantinople",
  "language": "en",
  "keywords": [
    {
      "text": "21-year-old Sultan Mehmed II",
      "relevance": 0.59046,
      "count": 1
    },
    {
      "text": "1st Constantinople",
      "relevance": 0.585504,
      "count": 1
    },
    {
      "text": "Ottoman Empire",
      "relevance": 0.5578,
      "count": 2
    },
    {
      "text": "Emperor Constantine XI",
      "relevance": 0.549606,
      "count": 2
    },
    {
      "text": "cost of many struggles",
      "relevance": 0.54732,
      "count": 1
    },
    {
      "text": "captains of the Venetian ships",
      "relevance": 0.542732,
      "count": 1
    },
    {
      "text": "2nd Constantinople",
      "relevance": 0.540864,
      "count": 1
  

## Manipulate the results in readable format

recreate the following output:

"the web page analyzed in *URL*.
Inside the web page NLU detected the following <NUM_KEYWORD> most relevant keywords: 
- keyword n1
- keyword n2


In [8]:
# Count number of keywords

TARGET_URL = "https://en.wikipedia.org/wiki/Fall_of_Constantinople"

# option number 1 using len()
NUM_KEYWORDS = len(response["keywords"])

# option number 2, Loop inside the list
NUM_KEYWORDS = 0
for keyword in response["keywords"]:
    #print(keyword["text"], ":", keyword["relevance"])
    NUM_KEYWORDS += 1

print("NUM_KEYWORDS", NUM_KEYWORDS)

# extract keywords
keywords_list = []
for keyword in response["keywords"]:
    keywords_list.append(keyword["text"])

print(keywords_list)

print("the web page analyzed is", TARGET_URL)
print("inside the web page NLU detected the following", NUM_KEYWORDS, "most relevant keywords:")
for k in keywords_list:
    print("-", k)

NUM_KEYWORDS 10
['21-year-old Sultan Mehmed II', '1st Constantinople', 'Ottoman Empire', 'Emperor Constantine XI', 'cost of many struggles', 'captains of the Venetian ships', '2nd Constantinople', 'attacking Ottoman Army', 'new Ottoman capital', 'part of the culmination of a 55-day siege']
the web page analyzed is https://en.wikipedia.org/wiki/Fall_of_Constantinople
inside the web page NLU detected the following 10 most relevant keywords:
- 21-year-old Sultan Mehmed II
- 1st Constantinople
- Ottoman Empire
- Emperor Constantine XI
- cost of many struggles
- captains of the Venetian ships
- 2nd Constantinople
- attacking Ottoman Army
- new Ottoman capital
- part of the culmination of a 55-day siege


## Store the results inside a remote storage

we will use the cloud service Cloud Object Storage to persist the information of the various requests we perform to NLU 

In [9]:
# install dependencies
! pip install ibm-cos-sdk



In [10]:
# authenticate with the service
import ibm_boto3
from ibm_botocore.client import Config, ClientError

# Constants for IBM COS values
COS_ENDPOINT = "https://s3.eu-de.cloud-object-storage.appdomain.cloud" # Current list avaiable at https://control.cloud-object-storage.cloud.ibm.com/v2/endpoints
COS_API_KEY_ID = "\\" 
COS_INSTANCE_CRN = "crn:v1:bluemix:public:cloud-object-storage:global:a/0d4bedc2778c435daa08b1c8f5fbe1f5:0e78fafe-555f-41df-97d8-9ba5c98e2c60::" # eg "crn:v1:bluemix:public:cloud-object-storage:global:a/3bf0d9003xxxxxxxxxx1c3e97696b71c:d6f04d83-6c4f-4a62-a165-696756d63903::"

# Create client
cos_client = ibm_boto3.client("s3",
    ibm_api_key_id=COS_API_KEY_ID,
    ibm_service_instance_id=COS_INSTANCE_CRN,
    config=Config(signature_version="oauth"),
    endpoint_url=COS_ENDPOINT
)

In [11]:
# example: create a text file

FILE_NAME = "test.txt"
FILE_CONTENT = "This is a text file used to text the integration with cloud object storage"
BUCKET_NAME = "nlu-responses-ma"

print("Creating new item: {0}".format(FILE_NAME))
try:
    cos_client.put_object(
        Bucket=BUCKET_NAME, 
        Key=FILE_NAME, 
        Body=FILE_CONTENT
    )
    print("Item: {0} created!".format(FILE_NAME))
except ClientError as be:
    print("CLIENT ERROR: {0}\n".format(be))
except Exception as e:
    print("Unable to create text file: {0}".format(e))

Creating new item: test.txt
Item: test.txt created!


In [12]:
# define a re-usable function for cloud object storage

def upload_file(file_name, file_content):
    BUCKET_NAME = "nlu-responses-ma"
    
    print("Creating new item: {0}".format(file_name))
    try: # manage an error: tries to execute the code whithin the try, if an error is detected, it doesn't crash but it runs the except code
        cos_client.put_object(
            Bucket=BUCKET_NAME, 
            Key=file_name, 
            Body=file_content
        )
        print("Item: {0} created!".format(file_name))
    except ClientError as be:
        print("CLIENT ERROR: {0}\n".format(be))
    except Exception as e:
        print("Unable to create text file: {0}".format(e))

In [13]:
# define a function for NLU

def analyze_webpage(webpage_url):
    print("Starting to analyze webpage", webpage_url)
    response = {}
    try:    
        response = natural_language_understanding.analyze(
            url = webpage_url,
            features = Features(
                entities = EntitiesOptions(limit=5),
                keywords = KeywordsOptions(limit=10),
                sentiment = SentimentOptions(document= True))
            ).get_result()
    except Exception as e:
        print("Unable to create text file: {0}".format(e))
    
    return response

In [14]:
# define a function to format the results in a string

def format_results(results_nlu):
    print("Counting number of keywords")
    NUM_KEYWORDS = len(results_nlu["keywords"])
    print("Listing keywords...")
    keywords_list = []
    for keyword in results_nlu["keywords"]:
        keywords_list.append(keyword["text"])
    print("Extraction completed")
    print("Preparing string of text")
    results_string = ""
    results_string += "the web page analyzed is " + results_nlu["retrieved_url"] 
    results_string += "\n"
    results_string += "inside the web page NLU detected the following " + str(NUM_KEYWORDS) + " most relevant keywords:"
    results_string += "\n"
    for k in keywords_list:
        results_string += "-" + k + "\n"
    return results_string 

# Test the workflow created

In [17]:
URL_TO_SEARCH = "https://www.iea.org/reports/world-energy-investment-2025/how-we-track-investment-in-energy"
FILENAME = "query01.txt"

result_dict = analyze_webpage(URL_TO_SEARCH)
result_text = format_results(result_dict)
upload_file(FILENAME, result_text)

Starting to analyze webpage https://www.iea.org/reports/world-energy-investment-2025/how-we-track-investment-in-energy
Counting number of keywords
Listing keywords...
Extraction completed
Preparing string of text
Creating new item: query01.txt
Item: query01.txt created!
