# PDF Q&A System (Radio)

In [1]:
import os
from langchain_openai import ChatOpenAI
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter


llm = ChatOpenAI(model="gpt-4o")

## Load PDF

In [2]:
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("pdf/2024_Fleischner Society_Glossary of Terms for Thoracic Imaging.pdf")

docs = loader.load()
print(len(docs))

55


In [8]:
docs[0:3]

[Document(metadata={'source': 'pdf/2024_Fleischner Society_Glossary of Terms for Thoracic Imaging.pdf', 'page': 0}, page_content='REVIEWS AND COMMENTARY  • STATEMENTS AND GUIDELINES\nThis copy is for personal use only. T o order copies, contact reprints@rsna.orgThe present glossary of terms for thoracic imaging is the \nfourth prepared by members of the Fleischner Society \nand replaces the previous glossaries of terms for chest radi-\nography (1) and CT (2,3), respectively. The impetus to up-\ndate the previous version arose from an awareness that new \nterms have emerged, others have become obsolete, and the \nusage of some terms has either changed or become incon-\nsistent to a degree that warranted a new definition. The \nmethodology used for compiling this glossary is described \nin Appendix S1. Appendix S2 summarizes the terms in-\ncluded, excluded, and rearranged, compared with the pre-\nvious version of the glossary.\nThe intention of this latest glossary is not to be exhaus-\n

In [5]:
## Example content
for line in [doc.page_content for doc in docs[0:2]]:
    print(line)

REVIEWS AND COMMENTARY  • STATEMENTS AND GUIDELINES
This copy is for personal use only. T o order copies, contact reprints@rsna.orgThe present glossary of terms for thoracic imaging is the 
fourth prepared by members of the Fleischner Society 
and replaces the previous glossaries of terms for chest radi-
ography (1) and CT (2,3), respectively. The impetus to up-
date the previous version arose from an awareness that new 
terms have emerged, others have become obsolete, and the 
usage of some terms has either changed or become incon-
sistent to a degree that warranted a new definition. The 
methodology used for compiling this glossary is described 
in Appendix S1. Appendix S2 summarizes the terms in-
cluded, excluded, and rearranged, compared with the pre-
vious version of the glossary.
The intention of this latest glossary is not to be exhaus-
tive but to focus on terms of clinical importance and those 
whose meaning may be perceived as vague or ambigu-
ous. This focus is important to 

## Split & Index

In [9]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

vectorstore = Chroma.from_documents(documents=splits, 
                                    embedding=OpenAIEmbeddings(),
                                    # Save
                                    persist_directory="db2")

In [47]:
# Now we can load the persisted database from disk, and use it as normal. 
vectorstore = Chroma(persist_directory="db2", embedding_function=OpenAIEmbeddings())
vectorstore

<langchain_chroma.vectorstores.Chroma at 0x16a040f10>

In [48]:
retriever = vectorstore.as_retriever()
retriever

VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x16a040f10>)

## Prompt

In [16]:
# set the LANGCHAIN_API_KEY environment variable (create key in settings)
from langchain import hub
prompt = hub.pull("rlm/rag-prompt")

In [38]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate


system_prompt = """You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Format your answer as headings, bullets, or numbering as appropriate. Keep the answer concise.
Context: {context}
"""

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

prompt.invoke({"context": "C", "input": "Hi"})


ChatPromptValue(messages=[SystemMessage(content="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Format your answer as headings, bullets, or numbering as appropriate. Keep the answer concise.\nContext: C\n"), HumanMessage(content='Hi')])

## Chain

In [39]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
question_answer_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), config={'run_name': 'format_inputs'})
| ChatPromptTemplate(input_variables=['context', 'input'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Format your answer as headings, bullets, or numbering as appropriate. Keep the answer concise.\nContext: {context}\n")), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='{input}'))])
| ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x1204f1210>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x1204f2fe0>, model_name='gpt-4o', openai_api_key=SecretStr('**********'), openai_proxy='')
| StrOutputParser(), config={'run_

## Execute

In [49]:
rag_chain = create_retrieval_chain(retriever, question_answer_chain)
results = rag_chain.invoke({"input": "What is Architectural distortion?"})
results

{'input': 'What is Architectural distortion?',
 'context': [Document(metadata={'page': 21, 'source': 'pdf/2024_Fleischner Society_Glossary of Terms for Thoracic Imaging.pdf'}, page_content='Fleischner Society: Glossary of Terms for Thoracic Imaging22 radiology.rsna.org \u2002 ■\u2002Radiology: Volume 3 10: Number 2—February 2024\nFigure 26: Aortopulmonary window: (A) Frontal chest \nradiograph and (B) coronal CT image reconstruction in the \nsame patient show aortopulmonary window (arrow).\nclick to return to page 3\nFigure 27: Apical cap: (A) Frontal chest radiograph and (B)  \ncoronal CT image reconstruction in the same patient show bilateral \napical caps (arrows).\nclick to return to page 3\nFigure 28: Architectural distortion: Transverse CT images of the (A, B) right and (C) left lung of three different patients show traction bronchiectasis (solid \narrows). Note the presence of coexisting fibrosis and architectural distortion (open arrow in B  and C ). click to return to page 3, 

In [51]:
print(results["answer"])

### Architectural Distortion

- **Definition**: Architectural distortion refers to the abnormal appearance of lung tissue, where the normal pulmonary architecture is disrupted.
- **Associated Features**: It often occurs with other signs of pulmonary fibrosis.
- **Imaging Findings**:
  - **CT Images**: It can be observed in transverse CT images as areas with traction bronchiectasis and coexisting fibrosis.
  - **Indications**: Presence of traction bronchiectasis (solid arrows in images) and fibrosis (open arrows in images).

