In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
model_path = "share/model_repos/internlm-chat-20b"
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_path, device_map="cuda",trust_remote_code=True,torch_dtype=torch.float16,load_in_4bit=True)
model = model.eval()



In [None]:
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from BCEmbedding import RerankerModel
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma

In [None]:
embeddingModel = HuggingFaceEmbeddings(model_name=r"root/model/bce-embedding-base-v1")
pdf_path = r"root/files/浙大城市学院财务报销办事指南.pdf"
pdf_loader = PyPDFLoader(pdf_path)
pdf_document = pdf_loader.load()
print(len(pdf_document[0].page_content))
print(len(pdf_document[1].page_content))

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=200)
docs = text_splitter.split_documents(pdf_document)
db = Chroma.from_documents(docs, embeddingModel)
question = "哪些票据报销时不予受理？"
docs = db.similarity_search(question,3)
print(docs)
print(docs[0].page_content)

In [None]:

prompt = f"""
你是浙大城市学院的校园AI助手,
请你根据下列上下文回答问题。不知道你就回答不知道。
上下文：{docs[0].page_content}
问题：{question}
回答：
"""
response, history = model.chat(tokenizer, prompt, history=history)
print(response)