#### Library Imports

In [1]:
import pandas as pd
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain_core.documents import Document
import weaviate
from langchain_weaviate.vectorstores import WeaviateVectorStore
from weaviate.classes.query import Filter
from pymongo import MongoClient
from langchain.retrievers.document_compressors.base import BaseDocumentCompressor
from langchain.retrievers import ContextualCompressionRetriever
from flashrank import Ranker, RerankRequest
from typing import Optional

import warnings
warnings.filterwarnings("ignore")

weaviate_client = weaviate.connect_to_local(port=8081)
embeddings = HuggingFaceEmbeddings(model_name="intfloat/e5-large-v2", cache_folder="./embedding_model")
MONGO_URI = "mongodb://root:root@localhost:27017/"
DATABASE_NAME = "incident_db"
COLLECTION_NAME = "incident_collection"

In [2]:
from pydantic import root_validator

class CustomReranker(BaseDocumentCompressor):
    """Document compressor using Flashrank interface."""

    client: Ranker
    """Flashrank client to use for compressing documents"""
    top_n: int = 3
    """Number of documents to return."""
    model: Optional[str] = None
    """Model to use for reranking."""

    class Config:
        extra = 'forbid'
        arbitrary_types_allowed = True

    @root_validator(pre=True)
    def validate_environment(cls, values):
        """Validate that api key and python package exists in environment."""
        try:
            from flashrank import Ranker
        except ImportError:
            raise ImportError(
                "Could not import flashrank python package. "
                "Please install it with `pip install flashrank`."
            )

        values["model"] = values.get("model", "ms-marco-MiniLM-L-12-v2")
        values["client"] = Ranker(model_name=values["model"], cache_dir="reranker")
        return values

    def compress_documents(
        self,
        documents,
        query,
        callbacks = None):
        passages = [
            {"id": i, "text": doc.page_content, "metadata": doc.metadata} for i, doc in enumerate(documents)
        ]
        rerank_request = RerankRequest(query=query, passages=passages)
        rerank_response = self.client.rerank(rerank_request)[:self.top_n]
        final_results = []
        for r in rerank_response:
            doc = Document(
                page_content=r["text"],
                metadata={
                    **r['metadata'],
                    "id": r["id"],
                    "relevance_score": r["score"]
                },
            )
            final_results.append(doc)
        return final_results

#### Build Chatbot

In [3]:
compressor = CustomReranker()

def create_retriever(industries):
    filters = None
    if not industries == 'all':
        filters = Filter.any_of([Filter.by_property("industry").equal(industry) for industry in industries])
    db = WeaviateVectorStore(client=weaviate_client, index_name="incident", text_key="text", embedding=embeddings)
    compression_retriever = ContextualCompressionRetriever(
        base_compressor = compressor,
        base_retriever = db.as_retriever(search_type="mmr", search_kwargs={"fetch_k": 20, 'filters': filters})
    )
    return compression_retriever

def get_documents_ids(retrieved_docs):
    if retrieved_docs:
        return [int(doc.metadata['incident_id']) for doc in retrieved_docs]
    else:
        return None

def get_documents_by_ids(ids):
    try:
        client = MongoClient(MONGO_URI)
        db = client[DATABASE_NAME]
        collection = db[COLLECTION_NAME]        
        documents = list(collection.find({"accident_id": {"$in": ids}}))
        return documents
    except Exception as e:
        return []
    finally:
        client.close()

In [4]:
from langchain_openai.chat_models import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import ChatPromptTemplate
import json
from datetime import datetime

llm = ChatOpenAI(
  openai_api_base="https://api.groq.com/openai/v1/",
  model = "llama-3.3-70b-versitile",
  temperature=0.7,
  api_key="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"
)


SYSTEM_TEMPLATE = """
<|start_header_id|>system<|end_header_id|>
You are IncidentNavigator, an AI designed to assist in managing and understanding incidents using a dataset of incident records. Your role is to provide precise, concise, and clear responses based on the context of the documents you receive. If a question falls outside of the information available in the provided context, you should clearly state that you cannot provide an answer but will offer the best response based on what is available.
The documents you process include the following fields:
- accident_id: Unique identifier for each incident.
- event_type: Category of the incident (e.g., fire, collision).
- industry_type: The sector or industry where the incident occurred (e.g., construction, transportation).
- accident_title: A brief, descriptive title for the accident.
- start_date: The date and time the incident began.
- finish_date: The date and time the incident ended or was resolved.
- accident_description: A detailed account of how the accident occurred.
- causes_of_accident: Factors or conditions leading to the incident.
- consequences: Outcomes or impacts of the incident (e.g., injuries, damage).
- emergency_response: Immediate actions taken to manage the incident.
- lesson_learned: Insights or recommendations for future prevention.
- url: Reference link to the document webpage.
When answering questions, follow these guidelines:
- Context Provided: If the context includes information related to these fields, provide a direct and detailed response based on the relevant data.
- Context Missing or Insufficient: If no context or relevant information is provided:
  - State that you cannot provide a definitive answer because the requester does not have sufficient privileges or the information is unavailable.
  - Do not speculate but offer a general response or guidance based on the type of question, when possible.
Context: {context}
IMPORTANT: KEEP YOUR ANSWERS AS CONCISE AND RELEVANT AS POSSIBLE, DON'T GIVE OUT UNNECESSARALY LONG ANSWERS.
<|eot_id|>
<|start_header_id|>user|end_header_id|>
Question: {question}
<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>
Answer:
"""

SYSTEM_PROMPT = ChatPromptTemplate.from_template(SYSTEM_TEMPLATE)

class CustomJSONEncoder(json.JSONEncoder):
  def default(self, obj):
      if isinstance(obj, datetime):
          return obj.isoformat()
      return super().default(obj)
  
def retrieve(data):
  query = data['question']
  retriever = create_retriever('all')
  docs = retriever.invoke(query)
  ids = get_documents_ids(docs)
  retrieved_docs = get_documents_by_ids(ids)
  for document in retrieved_docs:
      document.pop("_id", None)
  data['context'] = retrieved_docs
  return data

def get_industry(placeholder = None):
  return ['processing of metals', 'power generation']

#### Evaluation

**Create the test set**

In [None]:
test_set = pd.read_csv("data/test.csv")
test_set

In [None]:
import time

def get_answer_and_context(question):
    input = retrieve({"question": question})
    answer_chain = SYSTEM_PROMPT | llm | StrOutputParser()
    answer = answer_chain.invoke(input)
    return input['context'], answer

retrieved_contexts = []
responses = []

for index, row in test_set.iterrows():
    time.sleep(60)
    input = row["Input"]
    context, response = get_answer_and_context(input)
    retrieved_contexts.append(context)
    responses.append(response)

len(retrieved_contexts), len(responses)

In [None]:
base_responses = []

for index, row in test_set.iterrows():
    time.sleep(60)
    input = row["Input"]
    response = llm.invoke(input)
    base_responses.append(response)

In [11]:
test_set["Retrieved Context"] = retrieved_contexts
test_set["Response"] = responses
test_set["Base Response"] = base_responses
test_set.to_csv("data/test_results.csv", index=False)

**Get metrics**

In [41]:
import pandas as pd
import json

class CustomJSONEncoder(json.JSONEncoder):
  def default(self, obj):
      if isinstance(obj, datetime):
          return obj.isoformat()
      return super().default(obj)

def convert_context_to_string(context):
    context = eval(context)
    return list(json.dumps(document, cls=CustomJSONEncoder) for document in context)

In [42]:
test_set = pd.read_csv("data/test_results.csv")
eval_dataset = test_set[["Input", "Retrieved Context", "Response", "Reference"]]
eval_dataset = eval_dataset.rename(columns={"Input": "user_input", "Retrieved Context": "retrieved_contexts", "Response": "response", "Reference": "reference"})
eval_dataset["retrieved_contexts"] = eval_dataset["retrieved_contexts"].apply(convert_context_to_string)

user_dataset = test_set[["Input","Response", "Reference", "Base Response"]]
user_dataset = user_dataset.rename(columns={"Input": "Question", "Response": "Reponse A", "Reference": "Reponse B", "Base Response": "Reponse C"})
user_dataset.to_excel("data/user_dataset.xlsx", index=False)

In [43]:
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from ragas.metrics import LLMContextPrecisionWithoutReference, LLMContextRecall, ResponseRelevancy, Faithfulness
from ragas import SingleTurnSample
from langchain_openai.chat_models import ChatOpenAI
import time

evaluator_llm = LangchainLLMWrapper(ChatOpenAI(
  openai_api_base="https://api.groq.com/openai/v1/",
  model = "llama3-70b-8192",
  temperature=0.7,
  api_key="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"
))

evaluator_embeddings = LangchainEmbeddingsWrapper(HuggingFaceEmbeddings(model_name="intfloat/e5-large-v2", cache_folder="./embedding_model"))

metrics = [
  LLMContextPrecisionWithoutReference(llm=evaluator_llm),
  LLMContextRecall(llm=evaluator_llm),
  ResponseRelevancy(llm=evaluator_llm, embeddings=evaluator_embeddings),
  Faithfulness(llm=evaluator_llm)
]


context_precision = []
context_recall = []
response_relevancy = []
faithfulness = []

for index, row in eval_dataset[21:].iterrows():
  sample = SingleTurnSample(
    user_input=row['user_input'],
    response = row["response"],
    retrieved_contexts=row["retrieved_contexts"],
    reference=row["reference"]
  )
  print(f"Processing sample {index + 1}")
  for metric in metrics:
    score = metric.single_turn_score(sample)
    if metric.name == "llm_context_precision_without_reference":
      context_precision.append(score)
    elif metric.name == "context_recall":
      context_recall.append(score)
    elif metric.name == "answer_relevancy":
      response_relevancy.append(score)
    else:
      faithfulness.append(score)
    print(f"{metric.name}: {score}")
    time.sleep(120)

eval_dataset = eval_dataset[21:]
eval_dataset["Context Precision"] = context_precision
eval_dataset["Context Recall"] = context_recall
eval_dataset["Response Relevancy"] = response_relevancy
eval_dataset["Faithfulness"] = faithfulness
eval_dataset.to_csv("data/eval_results_part3.csv", index=False)

Processing sample 22
llm_context_precision_without_reference: 0.9999999999666667
context_recall: 0.625
answer_relevancy: 0.968630146081857
faithfulness: 0.8888888888888888
Processing sample 23
llm_context_precision_without_reference: 0.8333333332916666
context_recall: 0.5714285714285714
answer_relevancy: 0.0
faithfulness: 0.75
Processing sample 24
llm_context_precision_without_reference: 0.0
context_recall: 0.7142857142857143
answer_relevancy: 0.9555402414821357
faithfulness: 0.875


In [45]:
df1 = pd.read_csv("data/eval_results_part1.csv")
df2 = pd.read_csv("data/eval_results_part2.csv")
df3 = pd.read_csv("data/eval_results_part3.csv")


df = pd.concat([df1, df2, df3], axis=0)
df.to_csv("data/eval_results.csv", index=False)
df

Unnamed: 0,user_input,retrieved_contexts,response,reference,Context Precision,Context Recall,Response Relevancy,Faithfulness
0,How should I respond to a fire in an industria...,"['{""accident_id"": 370, ""event_type"": ""Major Ac...","Evacuate the area, alert the fire brigade and ...","Immediately activate fire alarm, evacuate non-...",1.0,0.714286,0.85423,0.909091
1,What are signs of an imminent explosion risk i...,"['{""accident_id"": 1097, ""event_type"": ""Major A...","Based on the provided context, signs of an imm...","Abnormal pressure readings, unusual temperatur...",0.333333,0.625,0.994755,0.333333
2,I'm seeing sludge overflow in my refinery's bi...,"['{""accident_id"": 1016, ""event_type"": ""Major A...",The operator is considering several possible e...,Likely filamentous bacteria growth caused by: ...,1.0,1.0,0.824178,1.0
3,What immediate actions should be taken if toxi...,"['{""accident_id"": 1335, ""event_type"": ""Near Mi...",Immediate actions include: \n1. Alerting perso...,"Activate emergency alarms, evacuate personnel ...",1.0,1.0,0.915913,1.0
4,How do I safely handle a chemical spill in a p...,"['{""accident_id"": 370, ""event_type"": ""Major Ac...","To safely handle a chemical spill, evacuate th...",Identify spilled substance from safety data sh...,0.583333,0.375,0.929224,0.875
5,What safety measures are essential for oxygen ...,"['{""accident_id"": 1151, ""event_type"": ""Near Mi...",Automatic shutdown of the compressor and autom...,"Regular inspection of trapping sieves, tempera...",0.833333,0.0,0.994842,1.0
6,How should I respond to a pressure vessel show...,"['{""accident_id"": 244, ""event_type"": ""Major Ac...",Immediately evacuate the area and follow emerg...,"Evacuate area immediately, activate emergency ...",1.0,0.428571,0.844943,0.8
7,What are warning signs of a runaway chemical r...,"['{""accident_id"": 26, ""event_type"": ""Major Acc...",Warning signs of a runaway chemical reaction i...,"Unexpected temperature increase, unusual color...",0.833333,0.5,0.996268,1.0
8,What precautions are needed when handling sodi...,"['{""accident_id"": 742, ""event_type"": ""Major Ac...",Precautions needed when handling sodium dichlo...,"Keep away from moisture and heat above 40°C, a...",1.0,0.666667,0.996238,0.714286
9,How do I respond to a leak in a gas storage fa...,"['{""accident_id"": 398, ""event_type"": ""Major Ac...",To respond to a leak in a gas storage facility...,"Activate emergency shutdown systems, evacuate ...",1.0,0.5,0.970604,0.833333


In [46]:
print("Mean Context Precision:", df["Context Precision"].mean())
print("Mean Context Recall:", df["Context Recall"].mean())
print("Mean Response Relevancy:", df["Response Relevancy"].mean())
print("Mean Faithfulness:", df["Faithfulness"].mean())

Mean Context Precision: 0.8298611110671875
Mean Context Recall: 0.4895833333333333
Mean Response Relevancy: 0.9005365625649825
Mean Faithfulness: 0.7374969937469938
