In [2]:
%pip install xformer --quiet
%pip install langchain --quiet
%pip install accelerate --quiet
%pip install transformers --quiet
%pip install bitsandbytes --quiet
%pip install unstructured --quiet
%pip install sentence-transformers --quiet
%pip install -U faiss-gpu tiktoken
%pip install langchainhub --quiet




In [36]:
import random
import torch
import os
from transformers import AutoTokenizer, AutoModel
from sentence_transformers import SentenceTransformer
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter, TokenTextSplitter, CharacterTextSplitter
from langchain_community.vectorstores import FAISS
import pandas as pd
from transformers import BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer, GenerationConfig, pipeline
import torch
from langchain import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain import hub
from IPython.display import Markdown, display



In [7]:
import warnings
warnings.filterwarnings('ignore')

In [8]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive


In [52]:
class MedicalChatbot:
    def __init__(self, embedding_model_name='thenlper/gte-base', llm_model_name="mistralai/Mistral-7B-Instruct-v0.1", faiss_indices_path="../data/faiss_indices"):
        self.embedding_model_name = embedding_model_name
        self.llm_model_name = llm_model_name
        self.faiss_indices_path = faiss_indices_path
        self.device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_built() else "cpu"
        # self.init_embedding_model()
        # self.load_FAISS_DB_retriever()
        # self.init_tokenizer()
        # self.init_model()
        # self.init_pipeline()
        # self.init_llm()
        # self.init_prompt_template()
        # self.init_qa_chain()


    def generate_response(self, user_query):
        result = self.qa_chain({"query": user_query})
        return result

    def init_embedding_model(self):
        model_kwargs = {'device': self.device}
        encode_kwargs = {'normalize_embeddings': True}
        self.embed_model = HuggingFaceEmbeddings(model_name=self.embedding_model_name, model_kwargs=model_kwargs,
                                            encode_kwargs=encode_kwargs)

    def load_FAISS_DB_retriever(self):
        self.faiss_db = FAISS.load_local(f"{self.faiss_indices_path}/{self.embedding_model_name}", self.embed_model)
        self.faiss_retriever = self.faiss_db.as_retriever(search_type="similarity", search_kwargs={"k": 3})


    def get_relevant_documents_for_query(self, query):
        return self.faiss_retriever.invoke(query)

    def init_model(self):
        quantization_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_compute_dtype=torch.float16,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_use_double_quant=True
        )

        self.model=AutoModelForCausalLM.from_pretrained(
            self.llm_model_name, torch_dtype=torch.float16,
            trust_remote_code=True,
            device_map="auto",
            quantization_config=quantization_config
        )

    def init_tokenizer(self):
        self.tokenizer = AutoTokenizer.from_pretrained(self.llm_model_name, use_fast=True)
        # self.tokenizer.pad_token = self.tokenizer.eos_token

    def init_pipeline(self):
        generation_config = GenerationConfig.from_pretrained(self.llm_model_name)
        generation_config.max_new_tokens = 512
        generation_config.temperature = 0.001
        generation_config.top_p = 0.95
        generation_config.do_sample = True
        generation_config.repetition_penalty = 1.15

        self.pipeline = pipeline(
            "text-generation",
            model=self.model,
            tokenizer=self.tokenizer,
            return_full_text=True,
            generation_config=generation_config,
    )

    def init_llm(self):
        self.llm = HuggingFacePipeline(
            pipeline=self.pipeline,
    )


    def init_prompt_template(self):

        # template = """Use the following pieces of context to answer the question at the end.
        # If you don't know the answer, just say that you don't know, don't try to make up an answer.
        # Use three sentences maximum and keep the answer as concise as possible.
        # Always say "thanks for asking!" at the end of the answer.
        #
        # {context}
        #
        # Question: {question}
        #
        # Helpful Answer:"""
        # self.custom_rag_prompt = PromptTemplate.from_template(template)

        self.prompt  = hub.pull("rlm/rag-prompt", api_url="https://api.hub.langchain.com")

    def init_qa_chain(self):
        self.qa_chain = RetrievalQA.from_chain_type(
            llm=self.llm,
            chain_type="stuff",
            retriever=self.faiss_retriever,
            return_source_documents=True,
            chain_type_kwargs={"prompt": self.prompt},
        )

    def init_all(self):
        self.init_embedding_model()
        self.load_FAISS_DB_retriever()
        self.init_tokenizer()
        self.init_model()
        self.init_pipeline()
        self.init_llm()
        self.init_prompt_template()
        self.init_qa_chain()

In [10]:
chatbot = MedicalChatbot(llm_model_name="microsoft/phi-2", faiss_indices_path="faiss_indices")

In [11]:
chatbot.device

'cuda'

In [12]:
chatbot.init_embedding_model()

In [13]:
chatbot.load_FAISS_DB_retriever()

In [14]:
question = "What are the effects of α1-antitrypsin (AAT) treatment on chronic fatigue syndrome (CFS) based on a case study involving a 49-year-old woman?"
retrieved = chatbot.get_relevant_documents_for_query(question)
len(retrieved)


3

In [15]:
chatbot.init_tokenizer()

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [16]:
chatbot.init_model()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [17]:
chatbot.init_pipeline()

In [18]:
chatbot.init_llm()

In [19]:
chatbot.init_prompt_template()

In [20]:
chatbot.init_qa_chain()

In [43]:
query = "Is occupational outcome in bipolar disorder predicted by premorbid functioning and intelligence?"
result = chatbot.generate_response(query)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [44]:
display(Markdown(f"<b>{result_['query']}</b>"))
display(Markdown(f"<p>{result_['result']}</p>"))

<b>Is occupational outcome in bipolar disorder predicted by premorbid functioning and intelligence?</b>

<p> No, occupational outcome in bipolar disorder is not predicted by premorbid functioning and intelligence.</p>

In [45]:
query = "What are the effects of α1-antitrypsin (AAT) treatment on chronic fatigue syndrome (CFS) based on a case study involving a 49-year-old woman?"
result = chatbot.generate_response(query)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [47]:
display(Markdown(f"<b>{result['query']}</b>"))
display(Markdown(f"<p>{result['result']}</p>"))

<b>What are the effects of α1-antitrypsin (AAT) treatment on chronic fatigue syndrome (CFS) based on a case study involving a 49-year-old woman?</b>

<p> According to the provided context, there is no direct information about the effects of α1-antitrypsin (AAT) treatment on chronic fatigue syndrome (CFS). Therefore, it cannot be determined whether AAT treatment can alleviate the symptoms of CFS based solely on this context. Further research would be needed to determine the effectiveness of AAT treatment on CFS.
</p>

In [49]:
result['source_documents']

[Document(page_content='Abstract: SUMMARY Several lines of evidence support the involvement of inflammatory and immunologic abnormalities in chronic fatigue syndrome CFS Since recent studies have shown that α1 antitrypsin AAT possesses antiinflammatory properties the potential therapeutic effect of AAT treatment on CFS has been investigated A 49yearold woman diagnosed with CFS was treated with intravenous infusions of a human plasmaderived AAT concentrate 60 mgkg body weight weekly for 8 consecutive weeks The patients monocyte elastase a regulator of inflammatory processes was 1170 Umg At completion of treatment improvement in maximal workload was observed 540717 of predicted Additionally amelioration in working memory scores 8394 and perceptual organization scores 7583 were detected on the Wechsler Adult Intelligence ScaleIII test Monocyte elastase decreased to a normal range 150 Umg Improvement in functional capacity allowed the patient to work in parttime employment These findings s