# Chatbot for Medical Journal Data

In [None]:
!pip install -q langchain
!pip install -q transformers

!pip install openai
!pip install python-dotenv

!pip install chromadb
!pip install tiktoken

Importing necessary libraries

In [None]:
from openai import OpenAI
import os
import openai
from openai import OpenAI
import sys
sys.path.append('../..')

from google.colab import userdata
api_key = userdata.get('OPENAI_API_KEY')
os.environ["OPENAI_API_KEY"] = api_key

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain.vectorstores import Chroma

In [None]:
# LOad the data downloaded from Kaggle in csv format

loader = CSVLoader(file_path = "/content/train.csv", source_column = "abstract")
pages = loader.load()

In [None]:
page = pages[0]

In [None]:
print(page)

page_content='uuid: 0\ntitle: Accessible Visual Artworks for Blind and Visually Impaired People: Comparing a Multimodal Approach with Tactile Graphics\nauthor: Quero, Luis Cavazos; Bartolome, Jorge Iranzo; Cho, Jundong\nabstract: Despite the use of tactile graphics and audio guides, blind and visually impaired people still face challenges to experience and understand visual artworks independently at art exhibitions. Art museums and other art places are increasingly exploring the use of interactive guides to make their collections more accessible. In this work, we describe our approach to an interactive multimodal guide prototype that uses audio and tactile modalities to improve the autonomous access to information and experience of visual artworks. The prototype is composed of a touch-sensitive 2.5D artwork relief model that can be freely explored by touch. Users can access localized verbal descriptions and audio by performing touch gestures on the surface while listening to themed bac

In [None]:
print(page.page_content[:500])

uuid: 0
title: Accessible Visual Artworks for Blind and Visually Impaired People: Comparing a Multimodal Approach with Tactile Graphics
author: Quero, Luis Cavazos; Bartolome, Jorge Iranzo; Cho, Jundong
abstract: Despite the use of tactile graphics and audio guides, blind and visually impaired people still face challenges to experience and understand visual artworks independently at art exhibitions. Art museums and other art places are increasingly exploring the use of interactive guides to make


In [None]:
page.metadata

{'source': 'Despite the use of tactile graphics and audio guides, blind and visually impaired people still face challenges to experience and understand visual artworks independently at art exhibitions. Art museums and other art places are increasingly exploring the use of interactive guides to make their collections more accessible. In this work, we describe our approach to an interactive multimodal guide prototype that uses audio and tactile modalities to improve the autonomous access to information and experience of visual artworks. The prototype is composed of a touch-sensitive 2.5D artwork relief model that can be freely explored by touch. Users can access localized verbal descriptions and audio by performing touch gestures on the surface while listening to themed background music along. We present the design requirements derived from a formative study realized with the help of eight blind and visually impaired participants, art museum and gallery staff, and artists. We extended th

# Splitting the data file into multiple chunks

In [None]:
# Split using recursive splitter

from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 150
)

In [None]:
splits = text_splitter.split_documents(pages)

In [None]:
len(splits)

22192

In [None]:
import openai
from packaging import version

required_version = version.parse("1.1.1") # replace the version by the version you want
current_version = version.parse(openai.__version__)

if current_version < required_version:
    raise ValueError(f"Error: OpenAI version {openai.__version__}"
                     " is less than the required version 1.1.1")
else:
    print("OpenAI version is compatible.")

# -- Now we can get to it
from openai import OpenAI

print('OPENAI WAS GREAT AGAIN')

OpenAI version is compatible.
OPENAI WAS GREAT AGAIN


# Create vectors from embeddings using Chroma

In [None]:
persist_directory = 'docs/chroma/' #store the embeddings to this directory for future use

embedding = OpenAIEmbeddings()

  warn_deprecated(


In [None]:
vectordb = Chroma.from_documents(
    documents=splits,
    embedding=embedding,
    persist_directory=persist_directory
)

In [None]:
print(vectordb._collection.count())

22192


In [None]:
question = "what are the abstracts of Quero, Luis Cavazos?"

In [None]:
# using maximum marginal relevance search to ketch 5 documents and then select 3 diverse docs from the fetched

docs_mmr = vectordb.max_marginal_relevance_search(question,k=3, fetch_k = 5)

In [None]:
len(docs)

70

In [None]:
docs_mmr[0].page_content

'uuid: 5027\ntitle: The soundscapes of Baja California Sur: Preliminary results from the Canon de Santa Teresa rock art landscape\nauthor: Diaz-Andreu, Margarita; Gutierrez Martinez, Maria de la Luz; Mattioli, Tommaso; Picas, Mathieu; Villalobos, Cesar; Zubieta, Leslie F.'

In [None]:
docs_mmr[1].page_content

'Noel Badano,María Inés Pérez Millán,María Florencia Quiroga,Patricia Baré,Itatí Ibañez,Roberto Pozner,Mercedes Borge,Guillermo Docena,Liliana Bezrodnik,María Belén Almejun'

# Compressing the retrieved content

In [None]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain.llms import OpenAIChat

def pretty_print_docs(docs):
    print(f"\n{'-' * 100}\n".join([f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)]))


In [None]:
# Wrap our vectorstore

from langchain.chat_models import ChatOpenAI
llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613", max_tokens=1000)

compressor = LLMChainExtractor.from_llm(llm)


compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=vectordb.as_retriever(search_type = "mmr")
)

  warn_deprecated(


In [None]:
question = "what did they say about ART?"
compressed_docs = compression_retriever.get_relevant_documents(question)
pretty_print_docs(compressed_docs)



Document 1:

satisfaction with the art appreciation activity directly affected their interest in art. The correlation reached 0.78. Satisfaction was strongly correlated with psychological expectations (0.67) and art information obtained in the early stage (0.61).
----------------------------------------------------------------------------------------------------
Document 2:

ART and patients with long ART are vulnerable. More attention should be paid to weather and ART, and these findings may have implications for effective policies to reduce ART to protect public health.
----------------------------------------------------------------------------------------------------
Document 3:

art psychotherapy could be an effective add-on strategy for the treatment of moderate-to-severe MDD
----------------------------------------------------------------------------------------------------
Document 4:

Art may be made as a guide to understanding sense of place, and also as a pathway to understa

# Retrivel

In [None]:
from langchain.chains import RetrievalQA

In [None]:
from langchain.chat_models import ChatOpenAI
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

In [None]:
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever()
)

In [None]:
question = "what did they say about ART?"

result = qa_chain({"query": question})

In [None]:
from langchain.prompts import PromptTemplate

# Build prompt

template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum.
Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer.
{context}
Question: {question}
Helpful Answer:"""

QA_CHAIN_PROMPT = PromptTemplate.from_template(template)



In [None]:
# Run chain with prompt

qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever(),
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

In [None]:
question = "is Laparoscopic the only way to treat Endometriosis"
result = qa_chain({"query": question})

In [None]:
result['result']

'Laparoscopic surgery was originally considered the gold standard in the treatment of endometriosis-related infertility, but recent advancements in assisted reproductive technology (ART) have also proven to be effective. The combined approach of surgery followed by ART has shown higher chances of pregnancy in infertile women with endometriosis. Thanks for asking!'

In [None]:
result["source_documents"][0]

Document(page_content='abstract: Laparoscopic surgery was originally considered the gold standard in the treatment of endometriosis-related infertility. Assisted reproductive technology (ART) was indicated as second-line treatment or in the case of male factor. The combined approach of surgery followed by ART proved to offer higher chances of pregnancy in infertile women with endometriosis. However, it was highlighted how pelvic surgery for endometriosis, especially in cases of ovarian endometriomas, could cause iatrogenic damage due to ovarian reserve loss, adhesion formation (scarring), and ischemic damage. Furthermore, in the last few years, the trend to delay the first childbirth, recent technological advances in ultrasound diagnosis, and technological progress in clinical and laboratory aspects of ART have certainly influenced the approach to infertility and endometriosis with, ART assuming a more relevant role. Management of endometriosis should take into account that the disease

Retievel with a different type : Refine



In [None]:
qa_chain_mr = RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever(),
    chain_type="refine"
)
result = qa_chain_mr({"query": question})
result["result"]

"Laparoscopic surgery has been traditionally considered the gold standard in the treatment of endometriosis, especially for cases where surgical intervention is necessary. However, there are other treatment options available for managing endometriosis, depending on the severity of the condition and the patient's symptoms. These may include medication therapy, hormone therapy, lifestyle changes, and in some cases, assisted reproductive technology (ART) for infertility related to endometriosis. It is important for patients to discuss with their healthcare provider to determine the most appropriate treatment plan for their individual situation. Additionally, robotic assistance in gynecological surgeries, including those for endometriosis, has shown benefits in more complicated operations, with lower conversion rates to open procedures and improved suturing capabilities compared to traditional laparoscopy. As technology advances and cost considerations are addressed, robotic surgery may be

# Using Memory to Retrieve

In [None]:
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

In [None]:
# coversational retrieval chain

from langchain.chains import ConversationalRetrievalChain
retriever=vectordb.as_retriever()
qa = ConversationalRetrievalChain.from_llm(
    llm,
    retriever=retriever,
    memory=memory
)

In [None]:
question = "what is enfometriosis"
result = qa({"question": question})

In [None]:
result['answer']

'Endometriosis is a medical condition where tissue similar to the lining inside the uterus, known as the endometrium, starts to grow outside the uterus. This can lead to pain, inflammation, and sometimes fertility issues.'

In [None]:
question = "is Laparoscopic the only way to treat Endometriosis"
result = qa({"question": question})

In [None]:
result['answer']

'Laparoscopic surgery has been considered the gold standard in the treatment of endometriosis-related infertility. However, in recent years, assisted reproductive technology (ART) has also played a significant role in the management of endometriosis. The combined approach of surgery followed by ART has shown to offer higher chances of pregnancy in infertile women with endometriosis. Additionally, advancements in ultrasound diagnosis and ART have influenced the approach to infertility and endometriosis, with ART assuming a more relevant role. So, while laparoscopic surgery is a common treatment option, it is not the only way to treat endometriosis.'