In [None]:
# %pip install llama-index-embeddings-openai
!pip install llama-index-readers-file
%pip install llama-index-embeddings-huggingface
# %pip install llama-index-embeddings-instructor
!pip install llama-index


In [None]:
from llama_index.core import SimpleDirectoryReader

# load documents
documents = SimpleDirectoryReader(input_files=["art_of_happiness.txt"]).load_data()

In [None]:
from llama_index.core.node_parser import (
    SentenceSplitter,
)

In [None]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding


embed_model = HuggingFaceEmbedding(model_name="Alibaba-NLP/gte-large-en-v1.5", trust_remote_code=True, embed_batch_size=1)

In [None]:
base_splitter = SentenceSplitter(chunk_size=500, chunk_overlap=100)
base_nodes = base_splitter.get_nodes_from_documents(documents)


In [None]:
base_nodes[0]

In [None]:
from tqdm import tqdm

for node in tqdm(base_nodes):
  node.embedding = embed_model.get_text_embedding(node.text)


In [None]:
len(base_nodes)

In [None]:
!pip install matplotlib

In [None]:
from llama_index.core import VectorStoreIndex
from llama_index.core.response.notebook_utils import display_source_node
from llama_index.core import Settings

# global default
Settings.embed_model = embed_model 

from tqdm import tqdm 

base_vector_index = VectorStoreIndex(base_nodes)
retriever = base_vector_index.as_retriever(similarity_top_k=5)



In [None]:
!pip install accelerate


In [None]:
import torch
torch.cuda.empty_cache()

from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

torch.random.manual_seed(0)

model = AutoModelForCausalLM.from_pretrained(
    "microsoft/Phi-3-mini-128k-instruct", 
    device_map="cuda", 
    torch_dtype="auto", 
    trust_remote_code=True, 
low_cpu_mem_usage=True

)
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-128k-instruct")


pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
)

generation_args = {
    "max_new_tokens": 500,
    "return_full_text": False,
    "temperature": 0,
    "do_sample": False,
}


In [None]:

def generate_answer(question, num_of_context):
    retriever = base_vector_index.as_retriever(similarity_top_k=num_of_context)
    retrieved_nodes = retriever.retrieve(question)

    
    context = ""
    for idx,retrieved_node in enumerate(retrieved_nodes):
        context += retrieved_node.get_content()

        
    prompt = f"""
    
    You are a spiritual leader. Your students will seek your guidance.
    Answer their questions based solely on the provided context. 
    If the context does not contain the information needed to answer a question, respond with "I don't know."
    Context: {context}
    Question: {question}
    
    """
    
    messages = [
        {"role": "user", "content": prompt},
    ]
    output = pipe(messages, **generation_args)
    answer = output[0]['generated_text']

    return (answer, context)


In [None]:
from sklearn.metrics.pairwise import cosine_similarity



def calculate_faithfulness(context, answer):
    context_embedding = embed_model.get_text_embedding(context)
    answer_embedding = embed_model.get_text_embedding(answer)
    similarity = cosine_similarity([context_embedding], [answer_embedding])
    return similarity[0][0]

def calculate_relevancy(question, answer):
    question_embedding = embed_model.get_text_embedding(question)
    answer_embedding = embed_model.get_text_embedding(answer)
    similarity = cosine_similarity([question_embedding], [answer_embedding])
    return similarity[0][0]


question = "How to be happy?"

output = generate_answer(question,3)
answer, context = output

faithfulness_score = calculate_faithfulness(context, answer)
relevancy_score = calculate_relevancy(question, answer)

print(f"Faithfulness Score: {faithfulness_score}")
print(f"Relevancy Score: {relevancy_score}")
print(answer)

In [None]:
from pathlib import Path 

content = Path("generated_questions.txt").read_text(encoding="utf-8")

generated_questions = content.splitlines()


In [None]:
len(generated_questions)

In [None]:
import pandas as pd
from tqdm import tqdm
import time
import warnings


results = []

warnings.filterwarnings('ignore')

for question in tqdm(generated_questions, desc="Evaluating"):
    for i in range(2, 6):
        start = time.time()
        output = generate_answer(question, i)
        answer, context = output
        response_time = time.time() - start
        faithfulness_score = calculate_faithfulness(context, answer)
        relevancy_score = calculate_relevancy(question, answer)

        results.append({
            'number of context': i,
            'question': question,
            'answer': answer,
            'faithfulness_score': faithfulness_score,
            'relevancy_score': relevancy_score,
            'response_time': response_time
        })

# Create a DataFrame
df = pd.DataFrame(results)

# Save the DataFrame to a CSV file
df.to_csv('phi3_128k_results.csv', index=False)


In [None]:
df

In [None]:
aggregated_results = df.groupby('number of context').agg({
    'faithfulness_score': ['mean', 'max', 'min'],
    'relevancy_score': ['mean', 'max', 'min'],
    'response_time': ['mean', 'max', 'min']
}).reset_index()

In [None]:
aggregated_results