In [None]:
%pip install llama-index-llms-openai
!pip install llama-index


In [13]:
import os
from llama_index.core import Settings
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.ollama import OllamaEmbedding

# Configure Ollama LLM
ollama_llm = Ollama(
    model="llama3.2:latest",
    base_url="http://localhost:11434",
    temperature=0.4
)

# Configure embedding model
ollama_embedding = OllamaEmbedding(
    model_name="nomic-embed-text:latest",
    base_url="http://localhost:11434",
    ollama_additional_kwargs={"mirostat": 0}
)

Settings.llm = ollama_llm
Settings.embed_model = ollama_embedding

In [14]:
from llama_index.core.evaluation import SemanticSimilarityEvaluator

evaluator = SemanticSimilarityEvaluator()

In [15]:
# This evaluator only uses `response` and `reference`, passing in query does not influence the evaluation
# query = 'What is the color of the sky'

response = "The sky is typically blue"
reference = """The color of the sky can vary depending on several factors, including time of day, weather conditions, and location.

During the day, when the sun is in the sky, the sky often appears blue. 
This is because of a phenomenon called Rayleigh scattering, where molecules and particles in the Earth's atmosphere scatter sunlight in all directions, and blue light is scattered more than other colors because it travels as shorter, smaller waves. 
This is why we perceive the sky as blue on a clear day.
"""

result = await evaluator.aevaluate(
    response=response,
    reference=reference,
)

In [None]:
print("Score: ", result.score)
print("Passing: ", result.passing)  # default similarity threshold is 0.8

In [17]:
response = "Sorry, I do not have sufficient context to answer this question."
reference = """The color of the sky can vary depending on several factors, including time of day, weather conditions, and location.

During the day, when the sun is in the sky, the sky often appears blue. 
This is because of a phenomenon called Rayleigh scattering, where molecules and particles in the Earth's atmosphere scatter sunlight in all directions, and blue light is scattered more than other colors because it travels as shorter, smaller waves. 
This is why we perceive the sky as blue on a clear day.
"""

result = await evaluator.aevaluate(
    response=response,
    reference=reference,
)

In [None]:
print("Score: ", result.score)
print("Passing: ", result.passing)  # default similarity threshold is 0.8

In [None]:
from llama_index.core.evaluation import SemanticSimilarityEvaluator
from llama_index.core.embeddings import resolve_embed_model, BaseEmbedding
from llama_index.core.base.embeddings.base import SimilarityMode




evaluator = SemanticSimilarityEvaluator(
    embed_model=ollama_embedding,
    similarity_mode=SimilarityMode.DEFAULT,
    similarity_threshold=0.6,
)

In [30]:
response = "The sky is yellow."
reference = "The sky is blue."

result = await evaluator.aevaluate(
    response=response,
    reference=reference,
)

In [None]:
print("Score: ", result.score)
print("Passing: ", result.passing)

We note here that a high score does not imply the answer is always correct.

Embedding similarity primarily captures the notion of "relevancy". Since both the response and reference discuss "the sky" and colors, they are semantically similar.