In [None]:
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
from datasets import Dataset
from ragas import evaluate
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_precision,
    context_recall,
)
import pandas as pd
from langchain.docstore.document import Document

In [None]:
import os
# Set API Key
os.environ["OPENAI_API_KEY"] = ""
os.environ["HUGGINGFACEHUB_API_TOKEN"] = ""

In [None]:
# LLM
llm_gpt = OpenAI(temperature=0.5, max_tokens=1024)


In [None]:
# Prompt template
template = """Answer the question using only the provided context. Be concise and provide estimates when requested. If the context is insufficient, state that you lack enough information:
{context}

Question: {question}
"""
prompt = PromptTemplate(template=template, input_variables=["context", "question"])

In [None]:
dataframe = pd.read_csv("insurance.csv")

  dataframe = pd.read_csv("D:\LLMs\FProject_2\data\insurance.csv")


In [53]:
print(dataframe.columns)


Index(['category', 'subCategory', 'question', 'answer'], dtype='object')


In [None]:
# Preparing documents for FAISS
documents = []
for index, row in dataframe.iterrows():
    documents.append(Document(page_content=row["answer"], metadata={"category": row["category"], "subCategory": row["subCategory"]}))

In [None]:
# Generating embeddings
embeddings = OpenAIEmbeddings()
vectorstore = FAISS.from_documents(documents, embeddings)

In [None]:
# Retriever
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})  # Top 3 relevant contexts



In [None]:
# Generating answers using the model
data = {"question": [], "ground_truth": [], "answer_gpt": [], "contexts": []}
for index, row in dataframe.iterrows():
    query = row["question"]  # Column name for questions
    ground_truth = row["answer"]  # Column name for ground truth answers

    data["question"].append(query)
    data["ground_truth"].append(ground_truth)

    # Retrieve relevant documents
    retrieved_docs = retriever.get_relevant_documents(query)
    context = " ".join([doc.page_content for doc in retrieved_docs])
    data["contexts"].append([doc.page_content for doc in retrieved_docs])

    # Run pipeline with GPT
    gpt_output = llm_gpt(prompt.format(context=context, question=query))
    data["answer_gpt"].append(gpt_output)


In [58]:
data["reference"] = [" ".join(contexts) for contexts in data["contexts"]]  # Combine list into a single string

In [None]:
# Converting data to a Dataset
dataset = Dataset.from_dict(data)

In [None]:
# Renaming the ground_truth column to response for compatibility with RAGAs
dataset = dataset.rename_column("ground_truth", "response")


In [None]:
# Evaluation with RAGAs
result = evaluate(
    dataset=dataset,
    metrics=[
        answer_relevancy,
        faithfulness,
        context_precision,
        context_recall,
    ],
)


Evaluating: 100%|██████████| 428/428 [04:04<00:00,  1.75it/s]


In [70]:
print("The evaluation result for the RAG system using OpenAI GPT:")
result

The evaluation result for the RAG system using OpenAI GPT:


{'answer_relevancy': 0.8277, 'faithfulness': 0.9147, 'context_precision': 0.9688, 'context_recall': 0.9695}

In [63]:
# Display the first row of stored data
print({key: value[0] for key, value in data.items()})

{'question': 'What is the average life insurance cost per month?', 'ground_truth': 'The cost of a life insurance policy depends on the type of policy you own. Term plans are generally cheaper in nature as it only offers death benefit with no profits or returns. Traditional plans and unit-linked plans tend to cost more as they offer a wide range of benefits. The cost also depends on the sum assured i.e. a higher sum assured will cost you more and vice versa.', 'answer_gpt': '\nThe average life insurance cost per month can vary greatly depending on various factors such as age, gender, income, smoking habits, type of policy, and sum assured. However, for a 26-year-old male applicant who smokes with an annual salary of Rs.7 lakh and a sum assured of Rs.1 crore, the average premium price for a term insurance plan is Rs.933 per month. It is important to note that the cost of a life insurance policy can increase with age and a higher sum assured will also result in a higher premium.', 'contex

In [None]:
# Export data to CSV
output_df = pd.DataFrame(data)
output_df.to_csv("ins100_gen.csv", index=False)

In [48]:
print(f"GPT Output: {gpt_output}")


GPT Output: 
You can avoid paying late fees in life insurance by selecting the auto-debit option, setting reminders before the premium payment date, keeping track of premium payment reminders, opting for yearly premium payments, and being aware of the exclusions and grace period of your policy.
