In [26]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import TextLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from os import listdir
from os.path import isfile, join

# some comment

embeddings = HuggingFaceEmbeddings(
    model_name = 'emilyalsentzer/Bio_ClinicalBERT'
)
 
text = "This is a test document."
query_result = embeddings.embed_query(text)

onlyfiles = [f for f in listdir('data') if isfile(join('data', f))]
onlyfiles = ['0.txt']
raw_documents = []
for file in onlyfiles:
    print(file)
    raw_doc = TextLoader(f'data/{file}').load()
    raw_documents.extend(raw_doc)

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)


documents = text_splitter.split_documents(raw_documents)
db = Chroma.from_documents(documents, embeddings)

No sentence-transformers model found with name /Users/kawsarnoor/.cache/torch/sentence_transformers/emilyalsentzer_Bio_ClinicalBERT. Creating a new one with MEAN pooling.


0.txt


In [27]:
import torch
import transformers
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")

#google = google/flan-t5-small
#Intel/dynamic_tinybert

model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small",
                                              device_map='auto',
                                            #   torch_dtype=torch.float16,
                                               low_cpu_mem_usage=True,
                                            
                                              )

In [28]:
from transformers import pipeline
from langchain.llms import HuggingFacePipeline
import torch

pipe = pipeline(
    "text2text-generation",
    model=model, 
    tokenizer=tokenizer, 
    max_length=512,
    temperature=0,
    top_p=0.95,
    repetition_penalty=1.15
)

local_llm = HuggingFacePipeline(pipeline=pipe)

In [29]:
retriever = db.as_retriever(search_kwargs={"k": 3})

In [30]:
from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(llm=local_llm, 
                                  chain_type="stuff", 
                                  retriever=retriever, 
                                  return_source_documents=True)

In [31]:
import textwrap

def wrap_text_preserve_newlines(text, width=110):
    # Split the input text into lines based on newline characters
    lines = text.split('\n')

    # Wrap each line individually
    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]

    # Join the wrapped lines back together using newline characters
    wrapped_text = '\n'.join(wrapped_lines)

    return wrapped_text

def process_llm_response(llm_response):
    print(wrap_text_preserve_newlines(llm_response['result']))
    print('\n\nSources:')
    for source in llm_response["source_documents"]:
        print(source.metadata['source'])

In [38]:
# full example
query = "what are the issues with the patients operation?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

Number of requested results 3 is greater than number of elements in index 2, updating n_results = 2


Weight regain


Sources:
data/0.txt
data/0.txt


In [2]:
# PROMPT template

In [1]:
from operator import itemgetter

from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda

In [11]:
from langchain.llms import HuggingFacePipeline

llm = HuggingFacePipeline.from_model_id(
    model_id="bigscience/bloom-1b7",
    task="text-generation",
    model_kwargs={"temperature": 0, "max_length": 64},
)



In [22]:
text = '''
I review this patient in my Telephone clinic following your letter. She had a gastric bypass under us back in 2007 with a good outcome. The main problem is weight regain and I am very pleased to hear that after being prescribed orlistat she managed to reduce weight from 114 kg to 96. Unfortunately the prescription run out and then now her weight is up to 102Kg. This is still a good outcome considering her original weight was 140kg but I understand the patient is unable to exercise due to back pain and is keen to go back to a lower weight. I am not sure if represcribing orlistat is a viable option and I suggested the patient to discuss with you if she could be prescribed liraglutide injections as they are usually helping with weight loss after surgery. Unfortunately we are not in a position to prescribe these drugs unless the patient is diabetic.   Overall there are no issues with her  bypass and she is very compliant with multivitamins.
'''

vectorstore = FAISS.from_texts([text], embedding=embeddings)
retriever = vectorstore.as_retriever()

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

In [23]:
chain = (
    {"context": retriever, "question": RunnablePassthrough()} 
    | prompt 
    | llm 
    | StrOutputParser()
)

In [25]:
chain.invoke("Has the patient been prescribed medication?")



'['