# Installing required packages

In [37]:
!pip install langchain --upgrade langchain_community langchain-cohere cohere pypdf requests chromadb sentence_transformers gradio



In [38]:
from langchain.text_splitter import RecursiveCharacterTextSplitter, SentenceTransformersTokenTextSplitter
import numpy as np
from pypdf import PdfReader
from tqdm import tqdm

def word_wrap(string, n_chars=72):

  if len(string) < n_chars:
    return string
  else:
    return string[:n_chars].rsplit(' ',1)[0] + '\n' + word_wrap(string[len(string[:n_chars].rspilt(' ', 1)[0])+1:], n_chars)

### Importing the PDF

In [39]:
from pypdf import PdfReader

reader = PdfReader("/content/drive/MyDrive/Encyclopedia of Medicine.pdf")
pdf_texts = [p.extract_text().strip() for p in reader.pages]

pdf_texts = [text for text in pdf_texts if text]


KeyboardInterrupt: 

### Splitting the document Recursively

In [None]:
character_splitter = RecursiveCharacterTextSplitter(
    separators= ["\n\n","\n",". "," ",""],
    chunk_size = 1000,
    chunk_overlap = 0
)

character_split_texts = character_splitter.split_text('\n\n'.join(pdf_texts))
print(len(character_split_texts))

In [None]:
token_splitter = SentenceTransformersTokenTextSplitter(chunk_overlap=0,tokens_per_chunk=256)

token_split_texts = []

for text in character_split_texts:
  token_split_texts += token_splitter.split_text(text)


In [None]:
import chromadb
from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction

embedding_function = SentenceTransformerEmbeddingFunction()

In [None]:
chroma_client = chromadb.Client()
chroma_collection = chroma_client.create_collection("info_medical", embedding_function = embedding_function)

ids = [str(i) for i in range(len(token_split_texts))]

chroma_collection.add(ids=ids, documents=token_split_texts)

chroma_collection.count()

In [None]:
query = "What is Doppler ?"

results = chroma_collection.query(query_texts=[query], n_results= 5)
retrieved_documents = results['documents'][0]


In [None]:
import cohere
co = cohere.Client('Your API key')

In [None]:
def RAG(query, retrieved_documents, model ="command"):

  info = "\n\n".join(retrieved_documents)

  messages = [
      {
          "role" : "system",
          "content": "You are a helpful expert medical research assistant. Your users are asking questions about information contained in an annual report"
          "you will bed shown the user's question, and the relevant information from encylopedia of medicine. Answer the user's question using only this information."

      },
      {
          "role":"user", "content": f"Question: {query}. \n Information: {info}"
      }

  ]

  response = co.chat(
      model = model,
      message = query,
      documents = messages
  )

  return response.text

In [None]:
import gradio as gr
def chatbot(query):
  try:

    results = chroma_collection.query(query_texts = [query], n_results=5)

    retrieved_documents = results['documents'][0]


    response = RAG(query, retrieved_documents)

    source_text = "\n\n".join(retrieved_documents)

    return response, source_text
  except Exception as e:
    print("Error: ", e)
    return str(e)

iface = gr.Interface(fn = chatbot,
                     inputs = gr.Textbox(lines=2, placeholder="Ask a medical question.."),
                     outputs= [
                         gr.Textbox(label="Reponse",lines=4),
                         gr.Textbox(label="Source Text", lines=10)
                     ],
                     title = "Medical Research Assistant",
                     description = "Ask any medical-related question, and I will provide answers based on the relevant information.")

iface.launch()