In [None]:
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
from datasets import Dataset
from ragas import evaluate
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_precision,
    context_recall,
)
import pandas as pd
from langchain.docstore.document import Document
from langchain_community.llms import Ollama

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
llm_ollama = Ollama(
    base_url="http://localhost:11434",
    model="llama3.1:latest"
)


In [5]:
llm_ollama

Ollama(model='llama3.1:latest')

In [None]:
# Defining the prompt template
template = """Answer the question using only the provided context. Be concise and provide estimates when requested. If the context is insufficient, state that you lack enough information:
{context}

Question: {question}
"""
prompt = PromptTemplate(template=template, input_variables=["context", "question"])

In [None]:
# Loading dataset
dataframe = pd.read_csv("insurance.csv")

In [None]:
# PRinting columns of the dataset
print(dataframe.columns)


Index(['category', 'subCategory', 'question', 'answer'], dtype='object')


In [None]:
# Preparing documents for FAISS 
documents = []
for index, row in dataframe.iterrows():
    documents.append(Document(page_content=row["answer"], metadata={"category": row["category"], "subCategory": row["subCategory"]}))

In [None]:
# Generating embeddings
embeddings = OpenAIEmbeddings()
vectorstore = FAISS.from_documents(documents, embeddings)

  embeddings = OpenAIEmbeddings()


In [None]:
# Defining the retriever
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})  # Top 3 relevant contexts



In [None]:
# Generating answers using the model
data = {"question": [], "ground_truth": [], "answer_ollama": [], "contexts": []}
for index, row in dataframe.iterrows():
    query = row["question"]  # Column name for questions
    ground_truth = row["answer"]  # Column name for ground truth answers

    data["question"].append(query)
    data["ground_truth"].append(ground_truth)

    # Retrieve relevant documents
    retrieved_docs = retriever.get_relevant_documents(query)
    context = " ".join([doc.page_content for doc in retrieved_docs])
    data["contexts"].append([doc.page_content for doc in retrieved_docs])

    # Run pipeline with ollama
    ollama_output = llm_ollama(prompt.format(context=context, question=query))
    data["answer_ollama"].append(ollama_output)


  retrieved_docs = retriever.get_relevant_documents(query)
  ollama_output = llm_ollama(prompt.format(context=context, question=query))


In [13]:
data["reference"] = [" ".join(contexts) for contexts in data["contexts"]]  # Combine list into a single string

In [None]:
# Converting data to a Dataset
dataset = Dataset.from_dict(data)

In [None]:
# Renaming the ground_truth column to response for compatibility with RAGAs
dataset = dataset.rename_column("ground_truth", "response")


In [None]:
# Evaluating the RAG QnA with RAGAs
result = evaluate(
    dataset=dataset,
    metrics=[
        answer_relevancy,
        faithfulness,
        context_precision,
        context_recall,
    ],
)


Evaluating: 100%|██████████| 428/428 [03:29<00:00,  2.04it/s]


In [None]:
# Displaying the metrics
result

{'answer_relevancy': 0.8449, 'faithfulness': 0.9168, 'context_precision': 0.9665, 'context_recall': 0.9677}

In [None]:
# Displaying the first row of stored data
print({key: value[0] for key, value in data.items()})

{'question': 'What is the average life insurance cost per month?', 'ground_truth': 'The cost of a life insurance policy depends on the type of policy you own. Term plans are generally cheaper in nature as it only offers death benefit with no profits or returns. Traditional plans and unit-linked plans tend to cost more as they offer a wide range of benefits. The cost also depends on the sum assured i.e. a higher sum assured will cost you more and vice versa.', 'answer_ollama': 'Unfortunately, I must inform you that there isn\'t an explicit "average" mentioned in the context. However, we can make an educated estimate based on the provided information.\n\nAssuming the given example of a 26-year-old male applicant with an annual salary of Rs.7 lakh and sum assured of Rs.1 crore is somewhat representative of an average premium price, let\'s try to calculate the estimated average life insurance cost per month for a different set of applicants.\n\nLet\'s consider a few examples:\n\n* For a 30

In [None]:
# Exporting data to CSV
output_df = pd.DataFrame(data)
output_df.to_csv("genans1.csv", index=False)

In [20]:
print(f"Ollama Output: {ollama_output}")


Ollama Output: To avoid paying late fees in life insurance, consider the following options:

1. **Select auto-debit option**: This way, you'll never miss out on premium payments, and your account will be automatically debited on the due date.
2. **Select reminder option before premium payment date**: Set reminders a few days or weeks before the payment is due to ensure you have enough time to make the payment.
3. **Keep track of premium payment reminders**: Regularly check your email or mobile notifications for reminders, and make payments promptly.
4. **Opt for yearly premium payment instead of monthly payments**: This approach can help you avoid missing out on regular payments and reduce the risk of late fees.

By implementing these strategies, you can minimize the likelihood of incurring late fees on your life insurance premiums.
