In [1]:
import os
import sys

# Project origin
sys.path.append(os.path.abspath(".."))

#### Imports

In [13]:
import requests
import json
import time
from datetime import timedelta
import pandas as pd

In [9]:
with open("../../dataset/test.json", "r", encoding="utf-8") as f:
    test_data = json.load(f)

test_data[:2]

[{'text': 'does village inn let you make reservations',
  'intent': 'accept_reservations'},
 {'text': 'can i make a reservation at chima steakhouse in chicago',
  'intent': 'accept_reservations'}]

#### Config and utils

In [None]:

# url = "http://localhost:5005/model/parse"

# payload = json.dumps({
#   "text": "who is the one who programmed you"
# })
# headers = {
#   'Content-Type': 'application/json'
# }

# response = requests.request("POST", url, headers=headers, data=payload)

# print(response.text)




{"text":"who is the one who programmed you","intent":{"name":"who_made_you","confidence":0.8705620765686035},"entities":[],"text_tokens":[[0,3],[4,6],[7,10],[11,14],[15,18],[19,29],[30,33]],"intent_ranking":[{"name":"who_made_you","confidence":0.8705620765686035},{"name":"who_do_you_work_for","confidence":0.009470746852457523},{"name":"w2","confidence":0.006508230697363615},{"name":"weather","confidence":0.006414706353098154},{"name":"improve_credit_score","confidence":0.006323659792542458},{"name":"card_declined","confidence":0.006265787407755852},{"name":"pay_bill","confidence":0.004768742248415947},{"name":"what_is_your_name","confidence":0.004008719231933355},{"name":"gas","confidence":0.003962547518312931},{"name":"what_can_i_ask_you","confidence":0.003872878151014447}],"response_selector":{"all_retrieval_intents":[],"default":{"response":{"responses":null,"confidence":0.0,"intent_response_key":null,"utter_action":"utter_None"},"ranking":[]}}}


In [14]:
def safe_rasa_invoke(text, retries=3, base_wait=1.0):
    url = "http://localhost:5005/model/parse"
    payload = json.dumps({"text": text})
    headers = {"Content-Type": "application/json"}

    for attempt in range(retries):
        try:
            response = requests.post(url, headers=headers, data=payload)
            if response.status_code == 200:
                data = response.json()
                return data.get("intent", {}).get("name", "UNKNOWN")
            else:
                raise Exception(f"Status {response.status_code}")
        except Exception as e:
            if attempt < retries - 1:
                wait = base_wait + 0.5 * attempt
                time.sleep(wait)
            else:
                print(f"Failed after retries: {e}")
                return "ERROR"

In [15]:
def run_rasa_evaluation(test_data, sleep_time=1.0, verbose=False, save_path=None):
    results = []
    errors = []

    total_start = time.time()
    total = len(test_data)

    for i, item in enumerate(test_data):
        user_input = item["text"]

        try:
            time.sleep(sleep_time)
            iter_start = time.time()
            predicted_intent = safe_rasa_invoke(user_input)
            duration = time.time() - iter_start

            result = {
                "text": user_input,
                "expected": item["intent"],
                "predicted": predicted_intent,
                "latency_sec": round(duration, 3),
            }
            results.append(result)

            if verbose:
                elapsed = time.time() - total_start
                percent = 100 * (i + 1) / total
                eta = timedelta(seconds=int((elapsed / (i + 1)) * (total - (i + 1))))
                print(f"[{i+1}/{total}] ✅ {percent:.2f}% - {round(duration, 2)}s - ETA: {eta}")

            if save_path:
                pd.DataFrame(results).to_csv(f"{save_path}_partial.csv", index=False)

        except Exception as e:
            errors.append(item)

            results.append({
                "text": user_input,
                "expected": item["intent"],
                "predicted": "ERROR",
                "latency_sec": 0
            })

            if save_path:
                pd.DataFrame(results).to_csv(f"{save_path}_partial.csv", index=False)
                with open(f"{save_path}_error.json", "w", encoding="utf-8") as f:
                    json.dump(errors, f, indent=2, ensure_ascii=False)

    total_duration = time.time() - total_start
    n_responses = len(results)
    sum_latencies = sum(r["latency_sec"] for r in results)

    stats = {
        "total_time_sec": round(total_duration, 3),
        "avg_latency_sec_inference": round(sum_latencies / n_responses, 3) if n_responses else 0,
        "throughput_inference": round(n_responses / sum_latencies, 3) if sum_latencies else 0,
        "throughput_pipeline": round(n_responses / total_duration, 3) if total_duration else 0,
    }

    if save_path:
        pd.DataFrame(results).to_csv(f"{save_path}_final.csv", index=False)
        with open(f"{save_path}_stats.json", "w", encoding="utf-8") as f:
            json.dump(stats, f, indent=2, ensure_ascii=False)
        if errors:
            with open(f"{save_path}_error.json", "w", encoding="utf-8") as f:
                json.dump(errors, f, indent=2, ensure_ascii=False)

    return results, stats

In [20]:
RASA_BASE_PATH = "../../results/rasa"

results, stats = run_rasa_evaluation(
    test_data=test_data,
    sleep_time=0,
    verbose=False,
    save_path=f"{RASA_BASE_PATH}/intent_eval"
)

print("Rasa stats:", stats)

Rasa stats: {'total_time_sec': 66.111, 'avg_latency_sec_inference': 0.008, 'throughput_inference': 131.768, 'throughput_pipeline': 68.068}
