In [1]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import TextLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from os import listdir
from os.path import isfile, join

# some comment

embeddings = HuggingFaceEmbeddings(
    model_name = 'emilyalsentzer/Bio_ClinicalBERT'
)
 
text = "This is a test document."
query_result = embeddings.embed_query(text)




  from .autonotebook import tqdm as notebook_tqdm
No sentence-transformers model found with name emilyalsentzer/Bio_ClinicalBERT. Creating a new one with MEAN pooling.


In [2]:

onlyfiles = [f for f in listdir('discharge') if isfile(join('discharge', f))]

raw_documents = []
for file in onlyfiles:
    print(file)
    raw_doc = TextLoader(f'discharge/{file}').load()
    raw_documents.extend(raw_doc)

text_splitter = CharacterTextSplitter(chunk_size=2000, chunk_overlap=20)


text_chunk_18.txt
text_chunk_24.txt
text_chunk_25.txt
text_chunk_19.txt
text_chunk_21.txt
text_chunk_20.txt
text_chunk_22.txt
text_chunk_23.txt
text_chunk_1.txt
text_chunk_3.txt
text_chunk_2.txt
text_chunk_6.txt
text_chunk_7.txt
text_chunk_5.txt
text_chunk_4.txt
text_chunk_9.txt
text_chunk_8.txt
text_chunk_12.txt
text_chunk_13.txt
text_chunk_11.txt
text_chunk_10.txt
text_chunk_14.txt
text_chunk_15.txt
text_chunk_17.txt
text_chunk_16.txt


In [3]:
documents = text_splitter.split_documents(raw_documents)
db = Chroma.from_documents(documents, embeddings)

In [6]:
from transformers import pipeline
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA

# 设置 Hugging Face 管道
pipe = pipeline(
    "text2text-generation",
    model="google/flan-t5-small",
    tokenizer="google/flan-t5-small",
    max_length=3000,
    temperature=0,
    top_p=0.95,
    repetition_penalty=1.15
)

# 创建本地 LLM
local_llm = HuggingFacePipeline(pipeline=pipe)

# 创建检索器
retriever = db.as_retriever(search_kwargs={"k": 3})

# 创建 QA 链
qa_chain = RetrievalQA.from_chain_type(
    llm=local_llm, 
    chain_type="stuff", 
    retriever=retriever, 
    return_source_documents=True
)



In [7]:
import textwrap

def wrap_text_preserve_newlines(text, width=110):
    # Split the input text into lines based on newline characters
    lines = text.split('\n')

    # Wrap each line individually
    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]

    # Join the wrapped lines back together using newline characters
    wrapped_text = '\n'.join(wrapped_lines)

    return wrapped_text

def process_llm_response(llm_response):
    print(wrap_text_preserve_newlines(llm_response['result']))
    print('\n\nSources:')
    for source in llm_response["source_documents"]:
        print(source.metadata['source'])

In [8]:
# full example
query = "What was the state of the patient's mood?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

Token indices sequence length is longer than the specified maximum sequence length for this model (16160 > 512). Running this sequence through the model will result in indexing errors


mood is "not good", affect stable, smiles appropriately a couple times. thoughts organized and goal-directed,
responds to prompts in organized fashion without straying from topic


Sources:
discharge/text_chunk_25.txt
discharge/text_chunk_24.txt
discharge/text_chunk_5.txt


In [9]:
from operator import itemgetter

from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda

In [25]:
retriever = db.as_retriever(search_kwargs={"k": 3})

In [17]:
from langchain.llms import HuggingFacePipeline

# 创建本地 LLM
local_llm = HuggingFacePipeline(pipeline=pipe)

llm = HuggingFacePipeline.from_model_id(
    model_id="google/flan-t5-small",
    task="text2text-generation",
    model_kwargs={"temperature": 0, "max_length": 2000},
)




In [18]:

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

In [19]:
chain = (
    {"context": retriever, "question": RunnablePassthrough()} 
    | prompt 
    | llm 
    | StrOutputParser()
)

In [20]:
chain.invoke("What was the state of the patient's mood?")

Token indices sequence length is longer than the specified maximum sequence length for this model (16352 > 512). Running this sequence through the model will result in indexing errors


'mood is "not good", affect stable, smiles appropriately a couple times. thoughts organized and goal-directed, responds to prompts in organized fashion without straying from topic'