In [1]:
%pip install langchain --quiet
%pip install accelerate --quiet
%pip install transformers --quiet
%pip install bitsandbytes --quiet
%pip install unstructured --quiet
%pip install sentence-transformers --quiet
%pip install -U faiss-gpu tiktoken
%pip install langchainhub --quiet
%pip install pinecone-client --quiet
%pip install langchain_pinecone --quiet
%pip install rank_bm25 --quiet
%pip install python-dotenv --quiet
%pip install ragas --quiet



In [3]:
from dotenv import load_dotenv, find_dotenv
import os
import pickle
import time
import warnings
import yaml

import pandas as pd
import pinecone
import torch
from IPython.display import Markdown, display
from sentence_transformers import SentenceTransformer
from transformers import (
    AutoModel,
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    GenerationConfig,
    pipeline,
)

from langchain import HuggingFacePipeline, hub
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.retrievers import BM25Retriever, EnsembleRetriever
from langchain.text_splitter import (
    CharacterTextSplitter,
    RecursiveCharacterTextSplitter,
    TokenTextSplitter,
)
# from langchain.vectorstores import Pinecone
from langchain_community.vectorstores import Pinecone
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.prompts import PromptTemplate
from langchain_community.llms import GPT4All
from langchain.chains import ConversationalRetrievalChain
from langchain.retrievers.multi_query import MultiQueryRetriever
from typing import List
from langchain.chains import LLMChain
from pydantic.v1 import BaseModel, Field
from langchain.prompts import PromptTemplate
from langchain.output_parsers import PydanticOutputParser
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.chains import TransformChain
from langchain.chains import SequentialChain
import logging

from ragas import evaluate
from ragas.testset.generator import TestsetGenerator
from ragas.testset.extractor import KeyphraseExtractor
# default DocumentStore
from ragas.testset.docstore import InMemoryDocumentStore
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper

In [4]:
# Suppress warnings
warnings.filterwarnings("ignore")

In [5]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/nlp_chatbot_folder

Mounted at /content/drive
/content/drive/MyDrive/nlp_chatbot_folder


In [7]:
with open ("cfg.yaml", "r") as file:
  cfg = yaml.safe_load(file)

In [8]:
cfg

{'retrievers': {'bm25': {'topk': 2, 'path': 'chunked_docs.pkl'},
  'faiss': {'faiss_index_path': 'faiss_indices/',
   'search_type': 'similarity_score_threshold',
   'score_threshold': 0.77,
   'topk': 2}},
 'ensemble': {'weights': [0.4, 0.6]},
 'llm_model': {'name': 'mistralai/Mistral-7B-Instruct-v0.1',
  'local_path': '',
  'temperature': 0.001,
  'top_p': 0.95,
  'repetition_penalty': 1.15},
 'embedding_model': 'thenlper/gte-base',
 'use_qa_chain': True,
 'use_conversational_qa_chain': True,
 'use_multi_query_qa_chain': True,
 'conversational_chain': {'conversation_depth': 2}}

In [6]:
from custom_chatbot import MedicalChatbot

In [9]:
chatbot = MedicalChatbot(cfg)

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

tokenizer_config.json:   0%|          | 0.00/1.47k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/385 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/68.1k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/57.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/618 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/219M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/314 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/712k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [11]:
from langchain_community.document_loaders import DataFrameLoader

df1 = pd.read_csv("data/processed_data_part1.csv")
df2 = pd.read_csv("data/processed_data_part2.csv")
df = pd.concat([df1, df2], ignore_index=True)
df.drop("Abstract_Length", axis=1, inplace=True)
loader = DataFrameLoader(df, page_content_column="Abstract")
docs_all = loader.load()

In [45]:
splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=0)
documents = splitter.split_documents(docs_all[:50])
docs = documents[:10]


In [46]:
llm = chatbot.get_llm()
embedding_model = chatbot.load_embedding_model()

In [47]:
langchain_llm = LangchainLLMWrapper(llm)
langchain_embeddings = LangchainEmbeddingsWrapper(embedding_model)

In [48]:
ragas_generator_llm_model = langchain_llm
ragas_critic_llm_model = langchain_llm
ragas_embeddings_model = langchain_embeddings

In [50]:
from ragas.testset.generator import TestsetGenerator

ragas_generator = TestsetGenerator(generator_llm=ragas_generator_llm_model,
                             critic_llm=ragas_critic_llm_model,
                             embeddings=ragas_embeddings_model,
                             docstore=ragas_docstore)

In [52]:
testset = ragas_generator.generate_with_langchain_docs(docs[:2], test_size=1)


embedding nodes:   0%|          | 0/4 [00:00<?, ?it/s]

Exception in thread Thread-20:
Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/usr/local/lib/python3.10/dist-packages/ragas/executor.py", line 75, in run
    results = self.loop.run_until_complete(self._aresults())
  File "/usr/lib/python3.10/asyncio/base_events.py", line 649, in run_until_complete
    return future.result()
  File "/usr/local/lib/python3.10/dist-packages/ragas/executor.py", line 63, in _aresults
    raise e
  File "/usr/local/lib/python3.10/dist-packages/ragas/executor.py", line 58, in _aresults
    r = await future
  File "/usr/lib/python3.10/asyncio/tasks.py", line 571, in _wait_for_one
    return f.result()  # May raise f.exception().
  File "/usr/local/lib/python3.10/dist-packages/ragas/executor.py", line 91, in wrapped_callable_async
    return counter, await callable(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/ragas/testset/extractor.py", line 49, in extract

ExceptionInRunner: The runner thread which was running the jobs raised an exeception. Read the traceback above to debug it. You can also pass `raise_exceptions=False` incase you want to show only a warning message instead.

In [54]:
from datasets import load_dataset

# loading the V2 dataset
amnesty_qa = load_dataset("explodinggradients/amnesty_qa", "english_v2")
amnesty_qa


Downloading builder script:   0%|          | 0.00/5.72k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/1.90k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/54.5k [00:00<?, ?B/s]

Generating eval split: 0 examples [00:00, ? examples/s]

DatasetDict({
    eval: Dataset({
        features: ['question', 'ground_truth', 'answer', 'contexts'],
        num_rows: 20
    })
})

In [55]:
from ragas.metrics import (
    answer_relevancy,
    faithfulness,
    context_recall,
    context_precision,
)


In [58]:
from ragas import evaluate

result = evaluate(
    amnesty_qa["eval"],
    metrics=[
        context_precision,
        faithfulness,
        answer_relevancy,
        context_recall,
    ],
    llm=langchain_llm,
    embeddings=langchain_embeddings
)

result


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Evaluating:   0%|          | 0/80 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for o

ExceptionInRunner: The runner thread which was running the jobs raised an exeception. Read the traceback above to debug it. You can also pass `raise_exceptions=False` incase you want to show only a warning message instead.

In [56]:
df = result.to_pandas()
df.head()


NameError: name 'result' is not defined