In [26]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama.llms import OllamaLLM
from langchain_core.output_parsers import StrOutputParser
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
import re

def ollama_query(context,question):
    template = """
    You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
    Question: {question} 
    Context: {context} 
    Answer:
    """
    prompt = ChatPromptTemplate.from_template(template)

    model = OllamaLLM(model="qwen2.5:7b")
    chain = prompt | model | StrOutputParser()
    response = chain.invoke({"question": question, "context": context})
    
    # 过滤掉<think>...</think>部分
    
    # filtered_response = re.sub(r'<think>.*?</think>\s*', '', response, flags=re.DOTALL)
    return response



In [27]:
ollama_query("好用的框架", "what is langchain")

'LangChain是一个可用的框架，用于构建和运行复杂的语言模型。它允许开发者轻松连接多种模型和数据源，创建高效的问答系统或其他语言处理任务。\n\n- LangChain是一个可用的框架，用于构建和运行复杂的语言模型。\n- 它允许开发者将多种模型和数据源集成在一起。\n- 这使得用户能够轻松创建如问答系统等高效的语言处理任务。'

In [5]:
from typing import List
from langchain.schema import Document

def load_documents(file_path:str)->List[Document]:
    
    # 使用PyPDFLoader加载PDF文件
    loader = PyPDFLoader(file_path)
    # 加载文档并返回Document对象列表
    # 是的，PyPDFLoader.load()返回的documents列表中，每个Document对象代表PDF的一页
    # 每个Document包含页面内容和元数据(如页码)
    print("loading documents...")
    documents = loader.load()
    return documents

In [6]:
def chunk_text(documents:List[Document])->List[Document]:
    """
    将文档切分成更小的文本块
    
    Args:
        documents: 包含文档的列表
        
    Returns:
        切分后的文本块列表
    """
    print("chunking text...")
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    chunks = text_splitter.split_documents(documents)
    return chunks

# 示例：如何使用chunk_text函数
# documents = load_documents("your_file.pdf")
# chunks = chunk_text(documents)

In [7]:
documents = load_documents("lstm.pdf")
print(documents)

loading documents...
[Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-01-13T02:09:12+00:00', 'author': '', 'keywords': '', 'moddate': '2023-01-13T02:09:12+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'subject': '', 'title': '', 'trapped': '/False', 'source': 'lstm.pdf', 'total_pages': 26, 'page': 0, 'page_label': '1'}, page_content='Sequencer: Deep LSTM for Image Classiﬁcation\nYuki Tatsunami1,2 Masato Taki1\n1Rikkyo University, Tokyo, Japan\n2AnyTech Co., Ltd., Tokyo, Japan\n{y.tatsunami, taki_m}@rikkyo.ac.jp\nAbstract\nIn recent computer vision research, the advent of the Vision Transformer (ViT)\nhas rapidly revolutionized various architectural design efforts: ViT achieved state-\nof-the-art image classiﬁcation performance using self-attention found in natural\nlanguage processing, and MLP-Mixer achieved competitive performance using\nsimple multi-layer per

In [8]:
documents = load_documents("lstm.pdf")
chunks = chunk_text(documents)
print(chunks[1])

loading documents...
chunking text...
page_content='computer vision. Here we propose Sequencer, a novel and competitive architecture
alternative to ViT that provides a new perspective on these issues. Unlike ViTs,
Sequencer models long-range dependencies using LSTMs rather than self-attention
layers. We also propose a two-dimensional version of Sequencer module, where an
LSTM is decomposed into vertical and horizontal LSTMs to enhance performance.
Despite its simplicity, several experiments demonstrate that Sequencer performs
impressively well: Sequencer2D-L, with 54M parameters, realizes 84.6% top-1
accuracy on only ImageNet-1K. Not only that, we show that it has good transfer-
ability and the robust resolution adaptability on double resolution-band. Our source
code is available at https://github.com/okojoalg/sequencer.
1 Introduction
Sequencer2D-S
Sequencer2D-M
Sequencer2D-L
0 20 40 60 80 100
79
80
81
82
83
84
ConvNeXt
Swin
ViP
CycleMLP
GFNet
Sequencer(ours)
Number of Parameters(M)
I

In [9]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma

def create_vector_db(chunks):
    """
    创建本地向量数据库并保存文本块
    
    Args:
        chunks: 文本块列表
        persist_directory: 向量数据库保存的目录
        
    Returns:
        创建好的向量数据库
    """
 
    
    # 创建Chroma向量数据库
    chromadb = Chroma.from_documents(
        documents=chunks,
        collection_name="navie_rag",
        embedding=HuggingFaceEmbeddings(model_name="BAAI/bge-base-en"),
        persist_directory="./rag_chroma"
    )
    
    # 将Chroma向量数据库持久化到磁盘
   
    print(f"Chroma向量数据库已创建并保存到rag_chroma")
  
    return chromadb




In [10]:
db = create_vector_db(chunks)

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/90.1k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/719 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Chroma向量数据库已创建并保存到rag_chroma


In [20]:
def retrieve_related_context(query, db, k=1):
    """
    根据用户查询检索相关上下文
    
    Args:
        query: 用户查询
        db: 向量数据库
        k: 返回的相关文档数量
        
    Returns:
        检索到的相关上下文
    """
    # 从向量数据库中检索与查询相关的文档
    retriever = db.as_retriever(search_type="similarity",search_kwargs={"k": k})
    print("Retrieving")
    related_chunks = retriever.invoke(query)
    

    return related_chunks

In [21]:
query = "Explain LSTM model in one line"

relevant_chunks = retrieve_related_context(query, db)

Retrieving


In [22]:
for i, chunk in enumerate(relevant_chunks):
  print(f"Chunk-{i}")
  print(chunk)
  print("\n")

Chunk-0
page_content='# x: input tensor of shape (B, H, W, C)
### initialization ###
self.rnn_v = nn.LSTM(C, D, num_layers=1, batch_first=True, bias=True, bidirectional=True)
self.rnn_h = nn.LSTM(C, D, num_layers=1, batch_first=True, bias=True, bidirectional=True)
self.fc = nn.Linear(4 * D, C)
### forward ###
def forward(self, x):
v, _ = self.rnn_v(x.permute(0, 2, 1, 3).reshape(-1, H, C))
v = v.reshape(B, W, H, -1).permute(0, 2, 1, 3)
h, _ = self.rnn_h(x.reshape(-1, W, C))
h = h.reshape(B, H, W, -1)
x = torch.cat([v, h], dim=-1)
x = self.fc(x)
return x
B.2 Architecture details
This subsection describes Sequencer’s architecture. The architectural details are shown in Table 4
and 5.
Sequencer2D-S is based on a ViP-S/7-like architecture. We intend to directly compare the BiLSTM2D
layer in Sequencer2D, which has a similar structure, with the Permute-MLP layer in ViP-S/7. Table 4
is a summary of the architecture. In keeping with ViP, the ﬁrst stage of Sequencers involves patch' metadata={'a

In [23]:
def build_context(relevant_chunks: List[Document]) -> str:
    """
    Builds a context string from retrieved relevant document chunks.

    Parameters:
    relevant_chunks (List[Document]): A list of retrieved relevant document chunks.

    Returns:
    str: A concatenated string containing the content of the relevant chunks.
    """

    print("Context is built from relevant chunks")
    context = "\n\n".join([chunk.page_content for chunk in relevant_chunks])

    return context

In [19]:
context = build_context(relevant_chunks)
print(context)

Context is built from relevant chunks
# x: input tensor of shape (B, H, W, C)
### initialization ###
self.rnn_v = nn.LSTM(C, D, num_layers=1, batch_first=True, bias=True, bidirectional=True)
self.rnn_h = nn.LSTM(C, D, num_layers=1, batch_first=True, bias=True, bidirectional=True)
self.fc = nn.Linear(4 * D, C)
### forward ###
def forward(self, x):
v, _ = self.rnn_v(x.permute(0, 2, 1, 3).reshape(-1, H, C))
v = v.reshape(B, W, H, -1).permute(0, 2, 1, 3)
h, _ = self.rnn_h(x.reshape(-1, W, C))
h = h.reshape(B, H, W, -1)
x = torch.cat([v, h], dim=-1)
x = self.fc(x)
return x
B.2 Architecture details
This subsection describes Sequencer’s architecture. The architectural details are shown in Table 4
and 5.
Sequencer2D-S is based on a ViP-S/7-like architecture. We intend to directly compare the BiLSTM2D
layer in Sequencer2D, which has a similar structure, with the Permute-MLP layer in ViP-S/7. Table 4
is a summary of the architecture. In keeping with ViP, the ﬁrst stage of Sequencers involves pat

In [28]:



def rag_pipeline(query: str, k: int = 3) -> str:
    """
    执行完整的RAG (检索增强生成) 流程。

    参数:
    query (str): 用户的问题
    k (int): 要检索的文档块数量
   

    返回:
    str: 生成的回答
    """
    # 1. 检索相关文档
    relevant_chunks = retrieve_related_context(query, db, k)
    
    # 2. 构建上下文
    context = build_context(relevant_chunks)
    
    # 3. 生成回答
    response = ollama_query(query, context)
    
    return response


# 测试完整的RAG系统
test_query = "What is lstm used for?"
response = rag_pipeline(test_query)
print(f"问题: {test_query}")
print(f"回答: {response}")


Retrieving
Context is built from relevant chunks
问题: What is lstm usef for?
回答: LSTM (Long Short-Term Memory) is used in image recognition to process sequential data efficiently along both height and width, enabling better handling of complex patterns through bidirectional processing. This approach reduces computational complexity compared to other RNNs, making it suitable for tasks requiring understanding of local and global contexts in images.
