# Chapter 11: Evaluating LLM Outputs

In [1]:
import re
import numpy as np
import dirtyjson as json
from typing import Any
from pydantic import BaseModel, EmailStr, Field, ValidationError
from datetime import datetime
from language_models.agent import Agent, OutputType, PromptingStrategy
from language_models.models.llm import OpenAILanguageModel, ChatMessage, ChatMessageRole
from language_models.models.embedding import SentenceTransformerEmbeddingModel
from language_models.proxy_client import ProxyClient
from language_models.settings import settings

In [2]:
proxy_client = ProxyClient(
    client_id=settings.CLIENT_ID,
    client_secret=settings.CLIENT_SECRET,
    auth_url=settings.AUTH_URL,
    api_base=settings.API_BASE,
)

In [3]:
llm = OpenAILanguageModel(
    proxy_client=proxy_client,
    model="gpt-4",
    max_tokens=500,
    temperature=0.2,
)

**String**

In [4]:
system_prompt = "You are an AI assistant designed to help users with a variety of tasks."

agent = Agent.create(
    llm=llm,
    system_prompt=system_prompt,
    prompt="{prompt}",
    prompt_variables=["prompt"],
    output_type=OutputType.STRING,
    prompting_strategy=PromptingStrategy.SINGLE_COMPLETION,
    verbose=True,
)

In [5]:
output = agent.invoke({"prompt": "What is the capital city of France?"})

[1m[38;2;50;164;103mFinal Answer[0m[1m[0m: Paris


In [6]:
print(output.final_answer)

Paris


In [7]:
print(output.final_answer == "Paris")

True


In [8]:
embedding_model = SentenceTransformerEmbeddingModel(model="all-MiniLM-L6-v2")
embedding1 = embedding_model.embed_query(output.final_answer)
embedding2 = embedding_model.embed_query("Paris")
cosine_similarity = np.dot(embedding1, embedding2) / (np.linalg.norm(embedding1) * np.linalg.norm(embedding2))
print(f"Cosine similarity: {cosine_similarity:.4f}")

Cosine similarity: 1.0000


**Integer/Float**

In [9]:
system_prompt = "You are an AI assistant designed to help users with a variety of tasks."

agent = Agent.create(
    llm=llm,
    system_prompt=system_prompt,
    prompt="{prompt}",
    prompt_variables=["prompt"],
    output_type=OutputType.FLOAT,
    prompting_strategy=PromptingStrategy.SINGLE_COMPLETION,
    verbose=True,
)

In [10]:
output = agent.invoke({"prompt": "What is the value of Pi up to two decimal places?"})

[1m[38;2;50;164;103mFinal Answer[0m[1m[0m: 3.14


In [11]:
print(output.final_answer)

3.14


In [12]:
print(output.final_answer == 3.14)

True


In [13]:
print(3.13 < output.final_answer < 3.15)

True


**Boolean**

In [14]:
system_prompt = "You are an AI assistant designed to help users with a variety of tasks."

agent = Agent.create(
    llm=llm,
    system_prompt=system_prompt,
    prompt="{prompt}",
    prompt_variables=["prompt"],
    output_type=OutputType.BOOLEAN,
    prompting_strategy=PromptingStrategy.SINGLE_COMPLETION,
    verbose=True,
)

In [15]:
output = agent.invoke({"prompt": "Is the number 5 greater than 3?"})

[1m[38;2;50;164;103mFinal Answer[0m[1m[0m: True


In [16]:
print(output.final_answer)

True


In [17]:
print(output.final_answer == True)

True


**Date/Timestamp**

In [18]:
system_prompt = "You are an AI assistant designed to help users with a variety of tasks."

agent = Agent.create(
    llm=llm,
    system_prompt=system_prompt,
    prompt="{prompt}",
    prompt_variables=["prompt"],
    output_type=OutputType.DATE,
    output_schema="%Y-%m-%d",
    prompting_strategy=PromptingStrategy.SINGLE_COMPLETION,
    verbose=True,
)

In [19]:
output = agent.invoke({"prompt": "We are excited to announce that our annual company retreat will be held on April 15, 2024. This event will be a great opportunity for team building and strategic planning."})

[1m[38;2;50;164;103mFinal Answer[0m[1m[0m: 2024-04-15


In [20]:
print(output.final_answer)

2024-04-15


In [25]:
print(output.final_answer == datetime.strptime("2024-04-15", "%Y-%m-%d").date())

True


**Array**

In [22]:
system_prompt = """You are an AI assistant designed to help users with a variety of tasks.

Extract all numbers from the user's input text."""

agent = Agent.create(
    llm=llm,
    system_prompt=system_prompt,
    prompt="{prompt}",
    prompt_variables=["prompt"],
    output_type=OutputType.ARRAY_INTEGER,
    prompting_strategy=PromptingStrategy.SINGLE_COMPLETION,
    verbose=True,
)

In [None]:
prompt = """Last weekend, six of us went on a 15-kilometer hike, starting at 7 AM.

By noon, we had covered 10 kilometers and reached Mount Elbert's 4,401-meter summit by 2 PM, with a temperature of 12°C.

We camped 5 kilometers away by 6 PM with 12 others and returned home by 5 PM the next day."""

output = agent.invoke({"prompt": prompt})

In [None]:
print(output.final_answer)

In [None]:
print(output.final_answer == [6, 15, 7, 10, 4401, 2, 12, 5, 6, 12, 5])

**Struct/Object**

In [23]:
system_prompt = "You are an AI assistant designed to help users with a variety of tasks."

agent = Agent.create(
    llm=llm,
    system_prompt=system_prompt,
    prompt="{prompt}",
    prompt_variables=["prompt"],
    output_type=OutputType.BOOLEAN,
    prompting_strategy=PromptingStrategy.SINGLE_COMPLETION,
    verbose=True,
)