# Task 1
在Task 1中，使用OpenAi的api转换为向量，由于本人免费api的使用次数有限，因此使用免费api https://github.com/chatanywhere/GPT_API_free

In [18]:
import requests
import json
from create_embeddings import extract_text_from
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
import os

api_base="https://api.chatanywhere.tech/v1"
api_key="sk-6lDdDxc8f9V7eEuT9egSqhe3VbtLyTBLHnr3LP5faS9vSf0s"

# 如果路径存在，表示已经获取到文档信息，避免重复请求
if not os.path.exists("faiss_store"):
    page_num = 1
    url = "https://support.strikingly.com/api/v2/help_center/en-us/articles.json"

    payload = {}
    headers = {
        "User-Agent": "Apifox/1.0.0 (https://apifox.com)",
        "Accept": "*/*",
        "Host": "support.strikingly.com",
        "Connection": "keep-alive",
    }

    pages = []
    while url:
        response = requests.request("GET", url, headers=headers, data=payload)
        js = json.loads(response.text)

        for article in js["articles"]:
            pages.append(
                # extract_text_from函数有修改，直接传入html进行解析，而不是url
                {"text": extract_text_from(article["body"]), "source": article["url"]}
            )
        url = js["next_page"]
    text_splitter = CharacterTextSplitter(chunk_size=1500, separator="\n")
    docs, metadatas = [], []
    for page in pages:
        splits = text_splitter.split_text(page["text"])
        docs.extend(splits)
        metadatas.extend([{"source": page["source"]}] * len(splits))
        # print(f"Split {page['source']} into {len(splits)} chunks")

    embeddings = OpenAIEmbeddings(
        openai_api_base=api_base,
        openai_api_key=api_key,
    )

    store = FAISS.from_texts(docs, embeddings, metadatas=metadatas)
    # 原代码中的方法不可用，改为直接调用FAISS的save_local方法
    # with open("faiss_store.pkl", "wb") as f:
    #     pickle.dumps(store, f)
    store.save_local("faiss_store")
# 加载保存的向量数据库
store=FAISS.load_local("faiss_store",embeddings=OpenAIEmbeddings(
        openai_api_base=api_base,
        openai_api_key=api_key
    ),allow_dangerous_deserialization=True)

测试案例

In [22]:
import argparse
import os

from langchain.chat_models import ChatOpenAI
from langchain.chains import VectorDBQAWithSourcesChain
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(openai_api_base=api_base,openai_api_key=api_key)
parser = argparse.ArgumentParser(description='Paepper.com Q&A')
parser.add_argument('question', type=str, help='Your question for Paepper.com')
args = parser.parse_args(["I want to cancel my plan, how do I do that?"])

store=FAISS.load_local("faiss_store",embeddings,allow_dangerous_deserialization=True)


os.environ["OPENAI_API_KEY"]=api_key
os.environ["OPENAI_API_BASE"]=api_base
# 由于某些原因，使用OpenAI会报错，改为ChatOpenAI。来源：https://github.com/langchain-ai/langchain/issues/1643
llm=ChatOpenAI(model_name='gpt-3.5-turbo',temperature=0, verbose=True,openai_api_base=api_base,openai_api_key=api_key)
chain = VectorDBQAWithSourcesChain.from_llm(
        llm=llm, vectorstore=store, verbose=True)
result = chain({"question": args.question})

print(f"Answer: {result['answer']}")
print(f"Sources: {result['sources']}")






[1m> Entering new VectorDBQAWithSourcesChain chain...[0m

[1m> Finished chain.[0m
Answer: To cancel your plan, you can follow the steps provided on the My Account page. If you need further assistance, you can contact support at support@strikingly.com.

Sources: 


结果

# Task 2

In [20]:
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory

memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

llm=ChatOpenAI(model_name='gpt-3.5-turbo',temperature=0, verbose=True,openai_api_base=api_base,openai_api_key=api_key)
# 返回source documents设置return_source_documents为True就行了
qa = ConversationalRetrievalChain.from_llm(llm, store.as_retriever(),return_source_documents=True)
query = "I want to cancel my plan, how do I do that?"
# 不需要chat_history直接设置为空列表就行了
result = qa({"question": query,"chat_history": []})

In [21]:
print(result['answer'])
print("Source:\n",result['source_documents'][0].metadata['source'])

To cancel your plan, follow these steps:

1. Go to your "My Account" page.
2. Click on "Cancel Plan."
3. Confirm the cancellation by clicking the "Confirm Cancel Plan" button.
4. Follow the cancellation prompts on the subsequent pages.

Please note that if there’s no cancellation before the next billing date, the system will automatically charge your account for the plan renewal. If you need more assistance or have any issues, you can email us at support@strikingly.com or chat with us for further help.
Source:
 https://strikingly.zendesk.com/api/v2/help_center/en-us/articles/215047057.json
