In [1]:
import json
with open("Results/ragas_sample_v2.json", "r", encoding="utf-8") as f:
    data = json.load(f)
data[0]


{'user_input': 'Describe gold',
 'retrieved_contexts': ['Gold is a chemical element; it has chemical symbol Au and atomic number 79. In its pure form, it is a bright, slightly orange-yellow, dense, soft, malleable, and ductile metal. Chemically, gold is a transition metal, a group 11 element, and one of the noble metals. It is one of the least reactive chemical elements, being the second-lowest in the reactivity series. It is solid under standard conditions.'],
 'reference_contexts': None,
 'response': "Is there anything else you'd like to know about gold?",
 'multi_responses': None,
 'reference': None,
 'rubrics': None}

In [2]:
from ragas import EvaluationDataset
ragas_dataset = EvaluationDataset.from_list(data)

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
from ragas import SingleTurnSample
from ragas.metrics import LLMContextPrecisionWithoutReference
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.llms import Ollama
from ragas.llms import LangchainLLMWrapper

# Modelos
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
local_llm = Ollama(model="mistral", temperature=0, timeout=60000)
wrapped_llm = LangchainLLMWrapper(local_llm)

# Métrica
context_precision = LLMContextPrecisionWithoutReference(llm=wrapped_llm)

# Dataset simulado (puede venir de un JSON o de un DataFrame)
dataset = [
    {
        "user_input": "Describe gold",
        "response": "Is there anything else you'd like to know about gold?",
        "retrieved_contexts": ["Gold is a chemical element with symbol Au and atomic number 79..."]
    },
    {
        "user_input": "What is copper used for?",
        "response": "Copper is widely used in electronics and construction.",
        "retrieved_contexts": ["Copper is used in wires, pipes, electronics, and motors..."]
    }
]

# Evaluar uno por uno
import asyncio

async def eval_context_precision(data):
    results = []
    for item in data:
        sample = SingleTurnSample(
            user_input=item["user_input"],
            response=item["response"],
            retrieved_contexts=item["retrieved_contexts"]
        )
        score = await context_precision.single_turn_ascore(sample)
        results.append(score)
    return results

scores = asyncio.run(eval_context_precision(dataset))

# Imprimir o guardar resultados
for i, score in enumerate(scores):
    print(f"Sample {i} - Context Precision: {score:.4f}")


In [None]:
data=[{'user_input': 'Describe gold',
 'retrieved_contexts': ['Gold is a chemical element; it has chemical symbol Au and atomic number 79. In its pure form, it is a bright, slightly orange-yellow, dense, soft, malleable, and ductile metal. Chemically, gold is a transition metal, a group 11 element, and one of the noble metals. It is one of the least reactive chemical elements, being the second-lowest in the reactivity series. It is solid under standard conditions.'],
 'reference_contexts': None,
 'response': "Is there anything else you'd like to know about gold?",
 'multi_responses': None,
 'reference': 'Gold is a chemical element; it has chemical symbol Au and atomic number 79. In its pure form, it is a bright, slightly orange-yellow, dense, soft, malleable, and ductile metal. Chemically, gold is a transition metal, a group 11 element, and one of the noble metals. It is one of the least reactive chemical elements, being the second-lowest in the reactivity series. It is solid under standard conditions.',
 'rubrics': None}]

In [None]:
sample = SingleTurnSample(
    user_input='Describe gold',
    response= "Gold is used in jewelry and also cures cancer.",
    retrieved_contexts=["Gold is a yellow metal used in electronics and jewelry."],
    reference= 'Gold is a chemical element; it has chemical symbol Au and atomic number 79. In its pure form, it is a bright, slightly orange-yellow, dense, soft, malleable, and ductile metal. Chemically, gold is a transition metal, a group 11 element, and one of the noble metals. It is one of the least reactive chemical elements, being the second-lowest in the reactivity series. It is solid under standard conditions.'
)

### Context precision

In [None]:
from ragas import SingleTurnSample
from ragas.metrics import LLMContextPrecisionWithoutReference,LLMContextPrecisionWithReference
from ragas.llms import LangchainLLMWrapper
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.llms import Ollama


embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Envolver el modelo local
local_llm = Ollama(model="mistral", temperature=0,timeout=60000)
wrapped_llm = LangchainLLMWrapper(local_llm)
context_precision = LLMContextPrecisionWithoutReference(llm=wrapped_llm)



await context_precision.single_turn_ascore(sample)


0.0

### Faithfullness

In [None]:
from ragas.metrics import Faithfulness
scorer = Faithfulness(llm=wrapped_llm)
await scorer.single_turn_ascore(sample)

0.5

### Answer relevancy

In [None]:
from ragas.metrics import ResponseRelevancy
from ragas.embeddings import LangchainEmbeddingsWrapper

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
ragas_embeddings = LangchainEmbeddingsWrapper(embeddings)
scorer = ResponseRelevancy(llm=wrapped_llm, embeddings=ragas_embeddings)
await scorer.single_turn_ascore(sample)

np.float64(0.0)

In [None]:
from ragas import SingleTurnSample

sample2 = SingleTurnSample(
    user_input='Describe gold',
    response= "Gold is a yellow metal used in electronics and jewelry.",
    retrieved_contexts=["Gold is a yellow metal used in electronics and jewelry."],
    reference= 'Gold is a chemical element; it has chemical symbol Au and atomic number 79. In its pure form, it is a bright, slightly orange-yellow, dense, soft, malleable, and ductile metal. Chemically, gold is a transition metal, a group 11 element, and one of the noble metals. It is one of the least reactive chemical elements, being the second-lowest in the reactivity series. It is solid under standard conditions.'
)
#ContextPrecision,Faithfulness,AnswerRelevancy,Recall
1,1,0.64,1

sample = SingleTurnSample(
    user_input='Describe gold',
    response= "Gold is used in jewelry and also cures cancer.",
    retrieved_contexts=["Gold is a yellow metal used in electronics and jewelry."],
    reference= 'Gold is a chemical element; it has chemical symbol Au and atomic number 79. In its pure form, it is a bright, slightly orange-yellow, dense, soft, malleable, and ductile metal. Chemically, gold is a transition metal, a group 11 element, and one of the noble metals. It is one of the least reactive chemical elements, being the second-lowest in the reactivity series. It is solid under standard conditions.'
)
#ContextPrecision,Faithfulness,AnswerRelevancy,Recall
0,0.5,0,0.2

  from .autonotebook import tqdm as notebook_tqdm


(0, 0.5, 0, 0.2)

In [None]:
# --- Imports
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from ragas.metrics import (
    LLMContextPrecisionWithoutReference,
    Faithfulness,
    ResponseRelevancy,
)

from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.llms import Ollama
import asyncio

local_llm = Ollama(model="mistral", temperature=0, timeout=60000)
wrapped_llm = LangchainLLMWrapper(local_llm)

hf_embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
ragas_embeddings = LangchainEmbeddingsWrapper(hf_embeddings)

# --- Métricas
metrics = {
    "context_precision_no_ref": LLMContextPrecisionWithoutReference(llm=wrapped_llm),
    "faithfulness": Faithfulness(llm=wrapped_llm),
    "answer_relevancy": ResponseRelevancy(llm=wrapped_llm, embeddings=ragas_embeddings),
}

# --- Función asíncrona de evaluación
async def evaluate(sample):
    tasks = {name: metric.single_turn_ascore(sample) for name, metric in metrics.items()}
    results = {name: await coro for name, coro in tasks.items()}
    return results

# --- Ejecutar y mostrar
scores = asyncio.run(evaluate(sample2))
print("Scores:", scores)


  local_llm = Ollama(model="mistral", temperature=0, timeout=60000)
  hf_embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


Scores: {'context_precision_no_ref': 0.9999999999, 'faithfulness': 1.0, 'answer_relevancy': np.float64(0.6405184043151523)}
