In [94]:
!pip install rouge_score nltk absl



ERROR: Could not find a version that satisfies the requirement absl (from versions: none)
ERROR: No matching distribution found for absl


In [None]:
import evaluate
from datasets import Dataset
from ragas import evaluate as ragas_evaluate
from ragas.metrics import faithfulness, answer_relevancy, context_precision, context_recall

from langchain import hub
from langchain.agents import AgentExecutor, create_react_agent
from langchain_core.tools import Tool

from startup_chain import get_startup_data_chain
from legal_chain import get_legal_data_chain
from startup_masterclass_chain import get_startup_masterclass_chain

from helper import get_llm, get_embeddings_model, get_startup_data_vector_store_retriever,get_legal_data_vector_store_retriever,get_startup_masterclass_vector_store_retriever

import os
os.environ['OPENAI_API_KEY']="Put OpenAI API"
from dotenv import load_dotenv
load_dotenv()

from langchain.chat_models import ChatOpenAI
from langchain_core.messages import AIMessage, HumanMessage
import pkgutil
import ragas.metrics

# List all available metrics in RAGAS
available_metrics = [modname for _, modname, _ in pkgutil.iter_modules(ragas.metrics.__path__)]
print("Available RAGAS Metrics:", available_metrics)

Available RAGAS Metrics: ['_answer_correctness', '_answer_relevance', '_answer_similarity', '_aspect_critic', '_bleu_score', '_context_entities_recall', '_context_precision', '_context_recall', '_datacompy_score', '_domain_specific_rubrics', '_factual_correctness', '_faithfulness', '_goal_accuracy', '_instance_specific_rubrics', '_multi_modal_faithfulness', '_multi_modal_relevance', '_noise_sensitivity', '_nv_metrics', '_rouge_score', '_simple_criteria', '_sql_semantic_equivalence', '_string', '_summarization', '_tool_call_accuracy', '_topic_adherence', 'base', 'utils']


In [30]:
chat_history = []
# llm = get_llm()
llm = ChatOpenAI(
    model_name="gpt-3.5-turbo",  # or "gpt-4"
    temperature=0,
    max_tokens=1024
)
embedding = get_embeddings_model()

startup_data_rag_chain = get_startup_data_chain(llm, embedding)
legal_data_rag_chain = get_legal_data_chain(llm, embedding)
startup_masterclass_rag_chain=get_startup_masterclass_chain(llm, embedding)

retriever_startup_data = get_startup_data_vector_store_retriever(embedding)
retriever_legal_data = get_legal_data_vector_store_retriever(embedding)
retriever_startup_masterclass = get_startup_masterclass_vector_store_retriever( embedding)

react_docstore_prompt = hub.pull("hwchase17/react")


 [LOG] OK !! get_embeddings_model DONE 


 [LOG] OK !! get_startup_data_vector_store_retriever DONE 


 [LOG] OK !! get_legal_data_vector_store_retriever DONE 


 [LOG] OK !! get_startup_masterclass_vector_store_retriever DONE 


 [LOG] OK !! get_startup_data_vector_store_retriever DONE 


 [LOG] OK !! get_legal_data_vector_store_retriever DONE 


 [LOG] OK !! get_startup_masterclass_vector_store_retriever DONE 



INFO:backoff:Backing off send_request(...) for 0.9s (requests.exceptions.ConnectionError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


In [31]:
def invoke_startup_data_rag_chain(input, **kwargs):
    ans=startup_data_rag_chain.invoke({"input": input, "chat_history": kwargs.get("chat_history", [])})
    return ans

def invoke_legal_data_rag_chain(input, **kwargs):
    ans=legal_data_rag_chain.invoke({"input": input, "chat_history": kwargs.get("chat_history", [])})
    return ans

def invoke_startup_masterclass_chain(input, **kwargs):
    ans=startup_masterclass_rag_chain.invoke({"input": input, "chat_history": kwargs.get("chat_history", [])})  # Fixed this line
    return ans

tools = [
    Tool(
        name="Startup Data Answer Question",
        func=invoke_startup_data_rag_chain,
        description="useful for when you need to answer questions about the context",
    ),
    Tool(
        name="Legal Data Answer Question",
        func=invoke_legal_data_rag_chain,
        description="useful for when you need to answer questions about legal information, regulations, or legal documents",
    ),
    Tool(
        name="StartupMasterclass",
        func=invoke_startup_masterclass_chain,
        description="useful for when you need expert guidance on startup fundamentals, product development, fundraising, growth tactics, team building, operations, and entrepreneurship strategies from comprehensive course materials",
    )
]

agent = create_react_agent(
    llm=llm,
    tools=tools,
    prompt=react_docstore_prompt,
)

agent_executor = AgentExecutor.from_agent_and_tools(
    agent=agent, tools=tools, handle_parsing_errors=True, verbose=True,
)

In [32]:
def evaluate_rag_pipeline(query,retriever):
    retrieved_docs = retriever.get_relevant_documents(query)
    answer = agent_executor.invoke({"input": query, "chat_history": chat_history})
    answer = answer['output']

    chat_history.append(HumanMessage(content=query))
    chat_history.append(AIMessage(content=answer))
    
    # reference_text = retrieved_docs[0] if retrieved_docs else "No reference available"
    if retrieved_docs:
        reference_text = retrieved_docs[0].page_content
    else:
        reference_text = "No reference available"

    retrieved_contexts = [doc.page_content for doc in retrieved_docs]
    eval_data = Dataset.from_list([{
        "user_input": query,
        "retrieved_contexts": retrieved_contexts,
        "response": answer,
        "reference": reference_text
    }])

    print("##########################################")
    print(eval_data)
    print(eval_data['reference'])
    print(reference_text)
    print("##########################################")

    results = ragas_evaluate(
    eval_data,
    metrics=[
        faithfulness,
        answer_relevancy,
        context_precision,
        context_recall
    ]
    )

    # Convert `EvaluationResult` to Pandas DataFrame and extract values
    results_df = results.to_pandas()
    faithfulness_score = results_df["faithfulness"].iloc[0]
    answer_relevancy_score = results_df["answer_relevancy"].iloc[0]
    context_precision_score = results_df["context_precision"].iloc[0]
    context_recall_score = results_df["context_recall"].iloc[0]

    # Load BLEU and ROUGE using the evaluate library
    bleu = evaluate.load("bleu")
    rouge = evaluate.load("rouge")

    # Convert retrieved docs into a single string for comparison
    reference = [" ".join([doc.page_content for doc in retrieved_docs])] if retrieved_docs else ["No reference available"]
    hypothesis = [answer]

    try:
        bleu_score = bleu.compute(predictions=hypothesis, references=[reference])['bleu']
    except Exception as e:
        print(f"BLEU computation error: {e}")
        bleu_score = 0.0

    try:
        rouge_score = rouge.compute(predictions=hypothesis, references=reference)
    except Exception as e:
        print(f"ROUGE computation error: {e}")
        rouge_score = {"rouge1": 0.0, "rouge2": 0.0, "rougeL": 0.0}


    # Print evaluation results
    print(f"Query: {query}")
    print(f"Generated Answer: {answer}")
    print(f"\n--- RAGAS Evaluation ---")
    print(f"Faithfulness: {faithfulness_score:.4f}")
    print(f"Answer Relevancy: {answer_relevancy_score:.4f}")
    print(f"Context Precision: {context_precision_score:.4f}")
    print(f"Context Recall: {context_recall_score:.4f}")
    print(f"\n--- Additional Metrics ---")
    print(f"BLEU Score: {bleu_score:.4f}")
    print(f"ROUGE Scores: {rouge_score}")

    # Return evaluation metrics as a dictionary
    return {
        "faithfulness": faithfulness_score,
        "answer_relevancy": answer_relevancy_score,
        "context_precision": context_precision_score,
        "context_recall": context_recall_score,
        "bleu_score": bleu_score,
        "rouge_score": rouge_score
    }

In [33]:
test_questions = [
    "which legal documents required for the new startup for registeration?",
    "Tell me NLP based startups ",
    "Tell me some some computer vision based starts",
    "How to register the new startup?",
    "What advice does the startup course give about sales and marketing for new companies?",
    "What are the key elements of building a product users love?"
]

In [34]:
query = test_questions[0]
evaluation_results = evaluate_rag_pipeline(query,retriever_legal_data)
print("\nFinal Evaluation Results:", evaluation_results)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI should check the legal requirements for registering a new startup
Action: Legal Data Answer Question
Action Input: legal documents required for startup registration[0m[33;1m[1;3m{'input': 'legal documents required for startup registration', 'chat_history': [], 'context': [Document(metadata={'source': 'D:\\Data\\Circle\\BOT\\Data\\cleaned\\Companies_Act_2017_cleaned.txt'}, page_content='Section 16:\n16.  Registration of memorandum and articles.—(1) There shall be \nfiled with the registrar an application on the specified form containing the \nfollowing information and documents for incorporation of a company, namely:—  \n \n(a) a declaration on the specified form, by an authorized intermediary \nor by a person named in the articles as a director, of compliance \nwith all or any of the requirements of this Act and the rules and \nregulations made thereunder in respect of registration and matters \nprecedent or incidental t

Evaluating: 100%|██████████| 4/4 [00:14<00:00,  3.61s/it]


Query: which legal documents required for the new startup for registeration?
Generated Answer: For the registration of a startup, legal documents required include a declaration of compliance with the law, a memorandum of association signed by all subscribers, articles of association (in certain cases), and an address for correspondence until the registered office is established. Once these documents are submitted, the registrar will review them for compliance with the law and issue a certificate of incorporation if all requirements are met. If there are any deficiencies, the registrar may request revisions within a specified period.

--- RAGAS Evaluation ---
Faithfulness: 1.0000
Answer Relevancy: 0.9702
Context Precision: 1.0000
Context Recall: 1.0000

--- Additional Metrics ---
BLEU Score: 0.0000
ROUGE Scores: {'rouge1': 0.09770992366412214, 'rouge2': 0.047400611620795105, 'rougeL': 0.07633587786259541, 'rougeLsum': 0.09465648854961832}

Final Evaluation Results: {'faithfulness': 1.0,

In [35]:
query = test_questions[1]
evaluation_results = evaluate_rag_pipeline(query,retriever_startup_data)
print("\nFinal Evaluation Results:", evaluation_results)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI should use Startup Data Answer Question to find NLP based startups
Action: Startup Data Answer Question
Action Input: "List of NLP based startups"[0m[36;1m[1;3m{'input': 'List of NLP based startups', 'chat_history': [], 'context': [Document(metadata={'id': 2462}, page_content="company_name: NuMind \n long_description: NuMind is a tool for data scientists, data analysts, but also software engineers to create custom NLP models. For example, a recruiting company uses NuMind to find which job offers best match a given resume. Etienne (CEO) was head of Machine Learning at Wolfram Research, and Samuel (CTO) co-founded Make.org (8M users). NuMind originated from our own frustration when developing NLP models. Leveraging large language models similar to GPT-3, NuMind allows to complete NLP projects at least 10x faster than before. We launched a private beta August 2 and had 9 paying customers one month later. \n batch: S22 \n st

Evaluating: 100%|██████████| 4/4 [00:14<00:00,  3.72s/it]


Query: Tell me NLP based startups 
Generated Answer: Some NLP-based startups include NuMind.

--- RAGAS Evaluation ---
Faithfulness: 0.5000
Answer Relevancy: 0.9451
Context Precision: 1.0000
Context Recall: 1.0000

--- Additional Metrics ---
BLEU Score: 0.0000
ROUGE Scores: {'rouge1': 0.023809523809523808, 'rouge2': 0.0, 'rougeL': 0.023809523809523808, 'rougeLsum': 0.023809523809523808}

Final Evaluation Results: {'faithfulness': 0.5, 'answer_relevancy': 0.9451209680003139, 'context_precision': 0.9999999999, 'context_recall': 1.0, 'bleu_score': 0.0, 'rouge_score': {'rouge1': 0.023809523809523808, 'rouge2': 0.0, 'rougeL': 0.023809523809523808, 'rougeLsum': 0.023809523809523808}}


In [36]:
query = test_questions[2]
evaluation_results = evaluate_rag_pipeline(query,retriever_startup_data)
print("\nFinal Evaluation Results:", evaluation_results)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI should use Startup Data Answer Question to gather information on computer vision based startups
Action: Startup Data Answer Question
Action Input: "computer vision based startups"[0m[36;1m[1;3m{'input': 'computer vision based startups', 'chat_history': [], 'context': [Document(metadata={'id': 1774}, page_content="company_name: Jido Maps \n long_description: We help teams that may or may not have machine learning expertise quickly turn their data into deployed computer vision models. Example applications include security camera monitoring, automating data entry, interpreting web scraped images, validated user photo inputs, augmented reality and mobile product scanning. \r\n\r\nIf you have visual or scanned data that you wish your software could interpret at scale, we can turn around a first proof of concept in under a week. Reach out and see how computer vision can change your business. \n batch: W18 \n status: Active , A

Evaluating: 100%|██████████| 4/4 [00:12<00:00,  3.10s/it]


Query: Tell me some some computer vision based starts
Generated Answer: Jido Maps is a computer vision based startup specializing in deploying computer vision models quickly for various applications.

--- RAGAS Evaluation ---
Faithfulness: 0.0000
Answer Relevancy: 0.7337
Context Precision: 1.0000
Context Recall: 1.0000

--- Additional Metrics ---
BLEU Score: 0.0000
ROUGE Scores: {'rouge1': 0.06666666666666667, 'rouge2': 0.028846153846153844, 'rougeL': 0.04761904761904762, 'rougeLsum': 0.05714285714285714}

Final Evaluation Results: {'faithfulness': 0.0, 'answer_relevancy': 0.7337070385471683, 'context_precision': 0.9999999999, 'context_recall': 1.0, 'bleu_score': 0.0, 'rouge_score': {'rouge1': 0.06666666666666667, 'rouge2': 0.028846153846153844, 'rougeL': 0.04761904761904762, 'rougeLsum': 0.05714285714285714}}


In [37]:
query = test_questions[3]
evaluation_results = evaluate_rag_pipeline(query,retriever_legal_data)
print("\nFinal Evaluation Results:", evaluation_results)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mYou need to understand the legal requirements and steps involved in registering a new startup.
Action: Legal Data Answer Question
Action Input: "What are the legal requirements and steps to register a new startup?"[0m[33;1m[1;3m{'input': 'What are the legal requirements and steps to register a new startup?', 'chat_history': [], 'context': [Document(metadata={'source': 'D:\\Data\\Circle\\BOT\\Data\\cleaned\\Companies_Act_2017_cleaned.txt'}, page_content='Section 16:\n16.  Registration of memorandum and articles.—(1) There shall be \nfiled with the registrar an application on the specified form containing the \nfollowing information and documents for incorporation of a company, namely:—  \n \n(a) a declaration on the specified form, by an authorized intermediary \nor by a person named in the articles as a director, of compliance \nwith all or any of the requirements of this Act and the rules and \nregulations made thereunder

Evaluating: 100%|██████████| 4/4 [00:16<00:00,  4.16s/it]


Query: How to register the new startup?
Generated Answer: To register a new startup, you need to file an application with the registrar containing specific documents and information, and upon satisfaction with compliance, the registrar will issue a certificate of incorporation.

--- RAGAS Evaluation ---
Faithfulness: 1.0000
Answer Relevancy: 0.9671
Context Precision: 1.0000
Context Recall: 1.0000

--- Additional Metrics ---
BLEU Score: 0.0000
ROUGE Scores: {'rouge1': 0.04092071611253197, 'rouge2': 0.017079419299743808, 'rougeL': 0.03580562659846548, 'rougeLsum': 0.037510656436487634}

Final Evaluation Results: {'faithfulness': 1.0, 'answer_relevancy': 0.967098933166583, 'context_precision': 0.9999999999666667, 'context_recall': 1.0, 'bleu_score': 0.0, 'rouge_score': {'rouge1': 0.04092071611253197, 'rouge2': 0.017079419299743808, 'rougeL': 0.03580562659846548, 'rougeLsum': 0.037510656436487634}}


In [38]:
query = test_questions[4]
evaluation_results = evaluate_rag_pipeline(query,retriever_startup_masterclass)
print("\nFinal Evaluation Results:", evaluation_results)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI should use the StartupMasterclass tool to get expert guidance on sales and marketing strategies for new companies.
Action: StartupMasterclass
Action Input: Sales and marketing course materials[0m[38;5;200m[1;3m{'input': 'Sales and marketing course materials', 'chat_history': [], 'context': [Document(metadata={'page': 310, 'source': 'D:/Data/Circle/BOT/Data/Startup Course Text Data.pdf'}, page_content='A quick background on Clever: we build software for schools. We are an app platform for \ndevelopers that is used today by about one in five schools in America. We started it about \ntwo years ago. \n  \nSales has been key. I want to use this time to share some of the things that have worked for \nme along the way. Of course, there\'s a million ways to do this, so you\'ll find what works for \nyou. \n  \nFirst I want to start about how I used to perceive sales. A lot of people see sales as having \nmystique around it. It\'s

Evaluating:  25%|██▌       | 1/4 [00:03<00:11,  3.96s/it]ERROR:ragas.executor:Exception raised in Job[3]: LLMDidNotFinishException(The LLM generation was not completed. Please increase the max_tokens and try again.)
Evaluating: 100%|██████████| 4/4 [00:09<00:00,  2.39s/it]


Query: What advice does the startup course give about sales and marketing for new companies?
Generated Answer: The startup course provides actionable insights on sales and marketing strategies for new companies, emphasizing the founder's role in sales, dedicating time to sales activities, understanding the sales funnel, leveraging growth marketing, experimenting and iterating, and focusing on hyper growth for long-term success.

--- RAGAS Evaluation ---
Faithfulness: 0.7143
Answer Relevancy: 0.9440
Context Precision: 0.3333
Context Recall: nan

--- Additional Metrics ---
BLEU Score: 0.0000
ROUGE Scores: {'rouge1': 0.03871829105473966, 'rouge2': 0.005347593582887701, 'rougeL': 0.025367156208277702, 'rougeLsum': 0.036048064085447265}

Final Evaluation Results: {'faithfulness': 0.7142857142857143, 'answer_relevancy': 0.9440476338292125, 'context_precision': 0.3333333333, 'context_recall': nan, 'bleu_score': 0.0, 'rouge_score': {'rouge1': 0.03871829105473966, 'rouge2': 0.005347593582887701

In [39]:
query = test_questions[5]
evaluation_results = evaluate_rag_pipeline(query,retriever_startup_masterclass)
print("\nFinal Evaluation Results:", evaluation_results)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI should seek expert guidance on product development to understand the key elements.
Action: StartupMasterclass
Action Input: Product development course materials[0m[38;5;200m[1;3m{'input': 'Product development course materials', 'chat_history': [], 'context': [Document(metadata={'page': 21, 'source': 'D:/Data/Circle/BOT/Data/Startup Course Text Data.pdf'}, page_content="Lecture 2: Ideas, Products, Teams and Execution Part II \nBefore I jump into today's lecture, I wanted to answer a few questions people had emailed \nme about the last lecture that we didn't have time for. So, if you have a question about \nwhat we covered last time, I am welcome to answer it now, starting with you. \n  \nQ: How do I identify if a market has a fast growth rate now and also for the next ten years? \n  \nA: The good news about this is this is one of the big advantages students have. You should \njust trust your instincts on this. Older peopl

Evaluating:  25%|██▌       | 1/4 [00:03<00:10,  3.58s/it]ERROR:ragas.executor:Exception raised in Job[3]: LLMDidNotFinishException(The LLM generation was not completed. Please increase the max_tokens and try again.)
Evaluating: 100%|██████████| 4/4 [00:07<00:00,  1.94s/it]


Query: What are the key elements of building a product users love?
Generated Answer: Actionable insights for product development include immersing yourself in the industry, learning by doing, iterating and pivoting, addressing challenges, and trusting your instincts.

--- RAGAS Evaluation ---
Faithfulness: 0.4000
Answer Relevancy: 0.8641
Context Precision: 1.0000
Context Recall: nan

--- Additional Metrics ---
BLEU Score: 0.0000
ROUGE Scores: {'rouge1': 0.017106200997861726, 'rouge2': 0.0028551034975017845, 'rougeL': 0.014255167498218105, 'rougeLsum': 0.015680684248039915}

Final Evaluation Results: {'faithfulness': 0.4, 'answer_relevancy': 0.864138075680561, 'context_precision': 0.9999999999666667, 'context_recall': nan, 'bleu_score': 0.0, 'rouge_score': {'rouge1': 0.017106200997861726, 'rouge2': 0.0028551034975017845, 'rougeL': 0.014255167498218105, 'rougeLsum': 0.015680684248039915}}
