In [4]:
# attach to the existing event loop when using jupyter notebooks
import nest_asyncio
nest_asyncio.apply()

from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
from langchain_huggingface import HuggingFacePipeline, HuggingFaceEmbeddings

from ragas.metrics import (
    answer_relevancy,
    faithfulness,
    context_recall,
    context_precision,
)
from ragas import evaluate
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from datasets import Dataset



In [5]:
# Define the LLM model and tokenizer
#model_name = "swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA"
model_name = "sapienzanlp/Minerva-3B-base-v1.0"
model_name = "microsoft/Phi-3-mini-128k-instruct"
#model_name = "meta-llama/Llama-2-7b-hf"

#tokenizer = AutoTokenizer.from_pretrained(model_name)
#model = AutoModelForCausalLM.from_pretrained(model_name)
# Create the pipeline
#pipe = pipeline(
##    model=model,
#    tokenizer=tokenizer,
#    return_full_text=True,
#    task='text-generation',
#    temperature=0.1, 
#    repetition_penalty=1.1
#)
pipe = pipeline("text-generation", model=model_name, trust_remote_code=True)



`flash-attention` package not found, consider installing for better performance: No module named 'flash_attn'.
Current `flash-attention` does not support `window_size`. Either upgrade or use `attn_implementation='eager'`.


Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   1%|          | 41.9M/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.67G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


In [6]:

# Create the HuggingFacePipeline for LangChain
langchain_llm = HuggingFacePipeline(pipeline=pipe)

# Embedding model
embedding_model = HuggingFaceEmbeddings(model_name="BAAI/bge-m3")

In [None]:

# Wrap the LLM and embeddings with the RAGAS wrappers
langchain_llm = LangchainLLMWrapper(langchain_llm)
langchain_embeddings = LangchainEmbeddingsWrapper(embedding_model)

# Example data samples
data_samples = {
    'question': ['When was the first super bowl?', 'Who won the most super bowls?'],
    'answer': ['The first superbowl was held on Jan 15, 1967', 'The most super bowls have been won by The New England Patriots'],
    'contexts': [['The First AFL–NFL World Championship Game was an American football game played on January 15, 1967, at the Los Angeles Memorial Coliseum in Los Angeles,'],
                 ['The Green Bay Packers...Green Bay, Wisconsin.', 'The Packers compete...Football Conference']],
    'ground_truth': ['The first superbowl was held on January 15, 1967', 'The New England Patriots have won the Super Bowl a record six times']
}

# Create a dataset from the data samples
dataset = Dataset.from_dict(data_samples)


In [None]:

# Run evaluation
result = evaluate(
    dataset=dataset,
    llm=langchain_llm,
    embeddings=langchain_embeddings,
    metrics=[
        context_precision,
        faithfulness,
        answer_relevancy,
        context_recall,
    ],
    raise_exceptions=True, 
    is_async = True  # Ensure exceptions are raised directly
)

# Convert the evaluation result to a pandas DataFrame and print it
evaluation_df = result.to_pandas()
print("Evaluation Results:\n", evaluation_df)
