In [1]:
!pip install faiss-gpu \
llama-index \
llama-index-vector-stores-faiss \
llama-index-embeddings-langchain \
langchain \
transformers==4.27.1 \
sentence-transformers

[0m

# HuggingfaceEmbedding，你的开源伙伴

In [2]:
import faiss
from llama_index.core import SimpleDirectoryReader, ServiceContext, VectorStoreIndex, StorageContext
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from llama_index.core.node_parser import SentenceSplitter
from llama_index.vector_stores.faiss import FaissVectorStore

text_splitter = CharacterTextSplitter(separator = "\n\n",chunk_size = 100, chunk_overlap  = 20)
parser = SentenceSplitter(chunking_tokenizer_fn=text_splitter.split_text)
documents = SimpleDirectoryReader('./data/faq/').load_data()
nodes = parser.get_nodes_from_documents(documents)

embed_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
)
service_context = ServiceContext.from_defaults(embed_model=embed_model)

dimension = 768

faiss_index = faiss.IndexFlatIP(dimension)
vector_store = FaissVectorStore(faiss_index=faiss_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex(nodes=nodes, storage_context=storage_context, service_context=service_context)


Created a chunk of size 130, which is longer than the specified 100
  service_context = ServiceContext.from_defaults(embed_model=embed_model)


In [3]:
from llama_index.core.vector_stores.types import VectorStoreQueryMode

query_engine = index.as_query_engine(
    vector_store_query_mode=VectorStoreQueryMode.DEFAULT,
    verbose=True,
)

response = query_engine.query("请问你们海南能发货吗？")
print(response)

response = query_engine.query("你们用哪些快递公司送货？")
print(response)

response = query_engine.query("你们的退货政策是怎么样的？")
print(response)


我们支持全国大部分省份的配送，包括海南。
We work with well-known courier companies such as Shunfeng Express, YTO Express, STO Express, Yunda Express, ZTO Express, and Best Express.
自收到商品之日起7天内，如产品未使用、包装完好，您可以申请退货。某些特殊商品可能不支持退货，请在购买前查看商品详情页面的退货政策。


# 使用ChatGLM提供对话

In [4]:
!pip install icetk
!pip install cpm_kernels

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
[0mhuggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
[0m

In [5]:
from transformers import AutoTokenizer, AutoModel

tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b-int4", trust_remote_code=True)
model = AutoModel.from_pretrained("THUDM/chatglm2-6b-int4", trust_remote_code=True).half().cuda()
model = model.eval()

Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.


In [6]:
question = """
自收到商品之日起7天内，如产品未使用、包装完好，您可以申请退货。某些特殊商品可能不支持退货，请在购买前查看商品详情页面的退货政策。

根据以上信息，请回答下面的问题：

Q: 你们的退货政策是怎么样的？
"""
response, history = model.chat(tokenizer, question, history=[])
print(response)

我们的退货政策如下:

自收到商品之日起7天内,如产品未使用、包装完好,您可以申请退货。某些特殊商品可能不支持退货,请在购买前查看商品详情页面的退货政策。

请注意,我们在不同网站上销售的商品退货政策可能会有所不同。因此,在购买我们的商品之前,请务必仔细阅读我们网站上商品详情页的退货政策。如果有任何疑问,请随时与我们联系。


In [7]:
question = """
Q: 你们的退货政策是怎么样的？
A:
"""
response, history = model.chat(tokenizer, question, history=[])
print(response)

我们的退货政策是在收到退回产品后的7天内，通过在线客服或电话联系我们的客服，并提供您的订单号码和退货原因。我们的客服将尽快为您处理退货事务，并在确认收到退货后，向您提供退款金额。


In [8]:
question = """
我们支持全国大部分省份的配送，包括北京、上海、天津、重庆、河北、山西、辽宁、吉林、黑龙江、江苏、浙江、安徽、福建、江西、山东、河南、湖北、湖南、广东、海南、四川、贵州、云南、陕西、甘肃、青海、台湾、内蒙古、广西、西藏、宁夏和新疆.

根据以上信息，请回答下面的问题：

Q: 你们能配送到三亚吗？
"""
response, history = model.chat(tokenizer, question, history=[])
print(response)

是的，我们支持全国大部分省份的配送，包括三亚。


In [9]:
question = """
我们支持全国大部分省份的配送，包括北京、上海、天津、重庆、河北、山西、江苏、浙江、安徽、福建、江西、山东、河南、湖北、湖南、广东、海南、四川、贵州、云南、陕西、甘肃、青海、台湾、内蒙古、广西、西藏、宁夏和新疆.但是不能配送到东三省

根据以上信息，请回答下面的问题：

Q: 你们能配送到哈尔滨吗？
"""

response,history = model.chat(tokenizer, question, history=[])
print(response)

由于没有提到哈尔滨,我们无法确定是否能将其加入到支持全国大部分省份的配送范围中。建议您查看公司网站或者联系客服以获取最新信息。


# 将ChatGLM封装成LLM

In [10]:
!pip install llama-index-llms-langchain

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
[0m

In [11]:
import faiss
from llama_index.core import SimpleDirectoryReader, ServiceContext, StorageContext,VectorStoreIndex
from llama_index.core.node_parser import SentenceSplitter
from llama_index.vector_stores.faiss import FaissVectorStore

from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms.base import LLM
from typing import Optional, List, Mapping, Any

class CustomLLM(LLM):
    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
        response, history = model.chat(tokenizer, prompt, history=[])
        return response

    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        return {"name_of_model": "chatglm2-6b-int4"}

    @property
    def _llm_type(self) -> str:
        return "custom"

In [12]:
llm = CustomLLM()

text_splitter = CharacterTextSplitter(separator="\n\n", chunk_size=100, chunk_overlap=20)
parser = SentenceSplitter(chunking_tokenizer_fn=text_splitter.split_text)
documents = SimpleDirectoryReader('./data/faq/').load_data()
nodes = parser.get_nodes_from_documents(documents)

embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2")
service_context = ServiceContext.from_defaults(embed_model=embed_model, llm=llm)

dimension = 768
faiss_index = faiss.IndexFlatIP(dimension)
vector_store = FaissVectorStore(faiss_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex(nodes=nodes, storage_context=storage_context, service_context=service_context)



Created a chunk of size 130, which is longer than the specified 100
  service_context = ServiceContext.from_defaults(embed_model=embed_model, llm=llm)


In [13]:
from llama_index.core import PromptTemplate

QA_PROMPT_TMPL = (
    "{context_str}"
    "\n\n"
    "根据以上信息，请回答下面的问题：\n"
    "Q: {query_str}\n"
    )
QA_PROMPT = PromptTemplate(QA_PROMPT_TMPL)

response = index.as_query_engine(text_qa_template=QA_PROMPT).query("请问你们海南能发货吗？")
print(response)


  warn_deprecated(


A: 是的，我们能够向海南地区提供商品和服务。如果您有关于海南发货的问题，欢迎随时与我们联系。


# 开源模型的不足之处

In [1]:
import faiss
from llama_index.core import SimpleDirectoryReader, ServiceContext, VectorStoreIndex, StorageContext
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from llama_index.core.node_parser import SentenceSplitter
from llama_index.vector_stores.faiss import FaissVectorStore

text_splitter = CharacterTextSplitter(separator = "\n\n",chunk_size = 128, chunk_overlap  = 32)
parser = SentenceSplitter(chunking_tokenizer_fn=text_splitter.split_text)
documents = SimpleDirectoryReader('./data/zhaohuaxishi/').load_data()
nodes = parser.get_nodes_from_documents(documents)

embed_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
)
service_context = ServiceContext.from_defaults(embed_model=embed_model)

dimension = 768

faiss_index = faiss.IndexFlatIP(dimension)
vector_store = FaissVectorStore(faiss_index=faiss_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex(nodes=nodes, storage_context=storage_context, service_context=service_context)


Created a chunk of size 266, which is longer than the specified 128
Created a chunk of size 235, which is longer than the specified 128
Created a chunk of size 264, which is longer than the specified 128
Created a chunk of size 250, which is longer than the specified 128
Created a chunk of size 203, which is longer than the specified 128
Created a chunk of size 222, which is longer than the specified 128
Created a chunk of size 429, which is longer than the specified 128
Created a chunk of size 205, which is longer than the specified 128
Created a chunk of size 176, which is longer than the specified 128
Created a chunk of size 242, which is longer than the specified 128
Created a chunk of size 202, which is longer than the specified 128
Created a chunk of size 165, which is longer than the specified 128
Created a chunk of size 347, which is longer than the specified 128
Created a chunk of size 133, which is longer than the specified 128
Created a chunk of size 191, which is longer tha

In [2]:
from llama_index.core import PromptTemplate

QA_PROMPT_TMPL = (
     "下面的内容来自鲁迅先生的散文集《朝花夕拾》，很多内容是以第一人称写的 \n"
    "---------------------\n"
    "{context_str}"
    "\n---------------------\n"
    "根据这些信息，请回答问题: {query_str}\n"
    "如果您不知道的话，请回答不知道\n"
    )
QA_PROMPT = PromptTemplate(QA_PROMPT_TMPL)

query_engine = index.as_query_engine(text_qa_template=QA_PROMPT)
response = query_engine.query("鲁迅先生在日本学习医学的老师是谁？")
print(response)


鲁迅先生在日本学习医学的老师是藤野严九郎。


In [3]:
response = query_engine.query("鲁迅先生是在日本的哪个城市学习医学的？")
print(response)

鲁迅先生是在日本的东京学习医学的。


In [5]:

response = query_engine.query(
    "三味书屋是在哪里？"
)
print(response)

不知道


In [6]:
!pip list

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Package                                 Version
--------------------------------------- --------------
aiofiles                                22.1.0
aiohttp                                 3.9.5
aiosignal                               1.3.1
aiosqlite                               0.20.0
altair                                  5.3.0
annotated-types                         0.6.0
anyio                                   4.3.0
argon2-cffi                             23.1.0
argon2-cffi-bindings                    21.2.0
arrow                                   1.3.0
astroid                                 3.1.0
asttokens                               2.4.1
async-timeout                           4.0.3
attrs       