In [4]:
# !pip install ibm-watson==7.* pandas
!pip install ibm-watson pandas

Collecting ibm-watson
  Downloading ibm-watson-11.0.0.tar.gz (358 kB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Collecting requests<3.0,>=2.0 (from ibm-watson)
  Downloading requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting websocket-client>=1.1.0 (from ibm-watson)
  Downloading websocket_client-1.9.0-py3-none-any.whl.metadata (8.3 kB)
Collecting ibm_cloud_sdk_core==3.*,>=3.3.6 (from ibm-watson)
  Downloading ibm_cloud_sdk_core-3.24.2-py3-none-any.whl.metadata (8.7 kB)
Collecting urllib3<3.0.0,>=2.4.0 (from ibm_cloud_sdk_core==3.*,>=3.3.6->ibm-watson)
  Using cached urllib3-2.5.0-py3-none-any.whl.metadata (6.5 kB)
Collecting PyJWT<3.0.0,>=2.10.1 (from ibm_cloud_sdk_core==3.*,>=3.3.6->ibm-watson)
  Using cached PyJWT-2.10.1-py3-none-any.whl.metadata (4.0 kB)
Collecting charset_normalizer<4,>=2 (from requests<3.0,>=2.0->ibm-watson)
  Downloading charset_n

In [6]:
!pip install python-dotenv

Collecting python-dotenv
  Downloading python_dotenv-1.1.1-py3-none-any.whl.metadata (24 kB)
Downloading python_dotenv-1.1.1-py3-none-any.whl (20 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.1.1


In [1]:
import os
from collections import Counter, defaultdict
import pandas as pd
from dotenv import load_dotenv
from ibm_watson import AssistantV1
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator


# --- CONFIG ---


# Load environment variables from .env if present
load_dotenv()

# Read from environment (with defaults to empty string)
API_KEY = os.getenv("WA_API_KEY", "")
SERVICE_URL = os.getenv("WA_URL", "https://api.us-south.assistant.watson.cloud.ibm.com")
WORKSPACE_ID = os.getenv("WA_WORKSPACE_ID", "")

# --- CONNECT ---
authenticator = IAMAuthenticator(API_KEY)
assistant = AssistantV1(
    version="2021-06-14",
    authenticator=authenticator
)
assistant.set_service_url(SERVICE_URL)

print("Watson Assistant client initialized.")


Watson Assistant client initialized.


In [2]:
# --- TEST MESSAGE ---
response = assistant.message(
    workspace_id=WORKSPACE_ID,
    input={"text": "What flowers do you recommend for Valentine's Day?"}
).get_result()

print(response["intents"])
print(response["output"]["text"])

[{'intent': 'flower_recommendations', 'confidence': 0.977379322052002}]
['Thank you, .', 'You can never go wrong with a dozen red roses.']


In [10]:
# 🧩 SECTION: Evaluate Watson Assistant intents (Student Exercise)

# 🔁 Alias map (your old names → workspace names shown in your screenshot)
INTENT_ALIASES = {
    "recommend_flowers": "flower_recommendations",
    "hours": "hours_info",
    "store_location": "location_info",
    # keep if you truly have it in your workspace:
    "availability": "availability",  # remove if not present
}

# 📚 Test set
TEST_ITEMS = [
    ("Which flowers for Valentine's Day?", "recommend_flowers"),
    ("Are you open on Sundays?", "hours"),
    ("Where are your stores?", "store_location"),
    ("Do you have red roses in stock?", "availability"),
]

# --- CONNECT ---
# TODO: ❶ Initialize the Watson Assistant client here using API_KEY, SERVICE_URL, WORKSPACE_ID
# authenticator = ...
# assistant = ...
# assistant.set_service_url(...)

# --- DISCOVER INTENTS ---
# TODO: ❷ Retrieve intents from your workspace and print them
# workspace_intents = assistant.list_intents(...).get_result()
# actual_intent_names = ...
# print(actual_intent_names)

# --- EVALUATION LOOP ---
rows = []

# Define helper function for alias matching
def is_alias_match(expected, predicted):
    """Return True if predicted equals expected OR equals alias(expected)."""
    return predicted == expected or predicted == INTENT_ALIASES.get(expected, None)

for text, expected in TEST_ITEMS:
    # TODO: ❸ Send message to the Assistant API and capture top intent and confidence
    # resp = assistant.message(...).get_result()
    # top_intent = ...
    # conf = ...
    
    # --- student complete here ---
    strict_ok = ___   # True if top_intent == expected
    alias_ok  = ___   # True if is_alias_match(expected, top_intent)
    
    rows.append({
        "utterance": text,
        "expected_intent": expected,
        "predicted_intent": top_intent,
        "confidence": round(conf, 3),
        "strict_correct": strict_ok,
        "alias_correct": alias_ok
    })

df = pd.DataFrame(rows)

# --- METRICS (students complete this section) ---
N = len(df)

# TODO: ❹ Compute strict accuracy and alias-adjusted accuracy
# strict_acc = ...
# alias_acc  = ...

print("\n=== Watson Assistant Evaluation ===")
print(f"Samples: {N}")
print(f"Strict Accuracy:        {strict_acc:.2%}")
print(f"Alias-Adjusted Accuracy:{alias_acc:.2%}\n")

# TODO: ❺ Compute a simple confusion matrix (optional bonus)
# Hint: use pd.crosstab(df["expected_intent"], df["predicted_intent"])
# cm = ...
# print(cm)

# --- Reflection (students discuss in class) ---
# ✅ What does the difference between strict and alias accuracy tell you?
# ✅ When would alias-adjusted accuracy be a better indicator?
# ✅ How could you extend this evaluation to include precision/recall or F1?


=== Watson Assistant Evaluation ===
Samples: 4
Strict Accuracy:        0.00%
Alias-Adjusted Accuracy:86.67%

