# Análisis de los diferentes modelos

In [None]:
from datasets import Dataset
import os
from ragas import evaluate
from ragas.metrics import faithfulness, answer_correctness
from langchain_community.embeddings import HuggingFaceEmbeddings
from transformers import AutoModelForCausalLM, AutoTokenizer

: 

In [None]:
data_samples = {
    'question': ['When was the first super bowl?', 'Who won the most super bowls?'],
    'answer': ['The first superbowl was held on Jan 15, 1967', 'The most super bowls have been won by The New England Patriots'],
    'contexts' : [
        ['The First AFL–NFL World Championship Game was an American football game played on January 15, 1967, at the Los Angeles Memorial Coliseum in Los Angeles,'],
        ['The Green Bay Packers...Green Bay, Wisconsin.','The Packers compete...Football Conference']
    ],
    'ground_truth': ['The first superbowl was held on January 15, 1967', 'The New England Patriots have won the Super Bowl a record six times']
}

dataset = Dataset.from_dict(data_samples)

: 

In [None]:
model_name = "meta-llama/Meta-Llama-3-8B"
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")

: 

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)

: 

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, pipeline


generation_config = GenerationConfig.from_pretrained(model_name)
generation_config.max_new_tokens = 1024
generation_config.temperature = 0.0001
generation_config.top_p = 0.95
generation_config.do_sample = True
generation_config.repetition_penalty = 1.15

text_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    generation_config=generation_config,
)

: 

In [None]:
from langchain import HuggingFacePipeline
llm = HuggingFacePipeline(pipeline=text_pipeline, model_kwargs={"temperature": 0})

: 

In [None]:
embeddings = HuggingFaceEmbeddings(model_name=model_name)

: 

In [None]:
score = evaluate(
    dataset,
    metrics=[faithfulness,answer_correctness],
    llm=llm,
    embeddings=embeddings
)

df_score = score.to_pandas()
print(df_score)

: 