In [1]:
import os
import openai
from dotenv import load_dotenv
load_dotenv("openai.env")

# 使用openai官方的SDK
openai.api_key = os.environ.get("OPENAI_API_KEY")
openai.api_base = os.environ.get("OPENAI_API_BASE")
messages = [{
    "role": "user",
    "content": "介绍下你自己"
}]

res = openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    messages=messages,
    stream=False
)
print(res['choices'][0]['message']['content'])

你好，我是一个语言模型AI助手，被设计用来处理和回答各种问题。我可以提供信息、帮助解决问题，或者与你进行对话。我拥有大量的知识和信息，并且会不断学习和进化以更好地为你服务。如果你有任何疑问或需求，都请随时告诉我，我会尽力帮助你。


### 长上下文精度问题

In [2]:
! pip install sentence-transformers

Collecting sentence-transformers
  Using cached sentence_transformers-3.3.1-py3-none-any.whl.metadata (10 kB)
Collecting transformers<5.0.0,>=4.41.0 (from sentence-transformers)
  Using cached transformers-4.46.3-py3-none-any.whl.metadata (44 kB)
Collecting torch>=1.11.0 (from sentence-transformers)
  Using cached torch-2.5.1-cp312-none-macosx_11_0_arm64.whl.metadata (28 kB)
Collecting scikit-learn (from sentence-transformers)
  Using cached scikit_learn-1.5.2-cp312-cp312-macosx_12_0_arm64.whl.metadata (13 kB)
Collecting scipy (from sentence-transformers)
  Using cached scipy-1.14.1-cp312-cp312-macosx_12_0_arm64.whl.metadata (60 kB)
Collecting huggingface-hub>=0.20.0 (from sentence-transformers)
  Using cached huggingface_hub-0.26.2-py3-none-any.whl.metadata (13 kB)
Collecting Pillow (from sentence-transformers)
  Using cached pillow-11.0.0-cp312-cp312-macosx_11_0_arm64.whl.metadata (9.1 kB)
Collecting filelock (from huggingface-hub>=0.20.0->sentence-transformers)
  Using cached filelo

In [11]:
from langchain.chains import LLMChain, StuffDocumentsChain
from langchain.document_transformers import (LongContextReorder)
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.vectorstores import Chroma
# 使用huggingface托管的开源LLM来做嵌入，MiniLM-L6-v2是一个较小的LLM
embedings = HuggingFaceBgeEmbeddings(model_name="all-MiniLM-L6-v2")

text = [
    "篮球是一项伟大的运动。",
    "带我飞往月球是我最喜欢的歌曲之一。",
    "凯尔特人队是我最喜欢的球队。",
    "这是一篇关于波士顿凯尔特人的文件。",
    "我非常喜欢去看电影。",
    "波士顿凯尔特人队以20分的优势赢得了比赛。",
    "这只是一段随机的文字。",
    "《艾尔登之环》是过去15年最好的游戏之一。",
    "L.科内特是凯尔特人队最好的球员之一。",
    "拉里.伯德是一位标志性的NBA球员。"
]
retrieval = Chroma.from_texts(text, embedings).as_retriever(
    serch_kwargs={"k", 10}
)
query = "关于我的喜好都知道什么?"

docs = retrieval.get_rekevant_documents(query)
print(docs)

No sentence-transformers model found with name sentence-transformers/all-MiniLM-L6-v2. Creating a new one with mean pooling.


OSError: We couldn't connect to 'https://huggingface.co' to load this file, couldn't find it in the cached files and it looks like sentence-transformers/all-MiniLM-L6-v2 is not the path to a directory containing a file named config.json.
Checkout your internet connection or see how to run the library in offline mode at 'https://huggingface.co/docs/transformers/installation#offline-mode'.

In [12]:
#对检索结果进行重新排序，根据论文的方案
#问题相关性越低的内容块放在中间
#问题相关性越高的内容块放在头尾

reordering = LongContextReorder()
reo_docs = reordering.transform_documents(docs)

#头尾共有4个高相关性内容块
reo_docs

NameError: name 'docs' is not defined

In [None]:
import os
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI
from dotenv import load_dotenv
load_dotenv("openai.env")
api_key = os.environ.get("OPENAI_API_KEY")
api_base = os.environ.get("OPENAI_API_BASE")
# 设置llm

llm = OpenAI(
    api_key=api_key,
    api_base=api_base,
    model="gpt-3.5-turbo-instruct",
    temperature=0,
)

document_prompt = PromptTemplate(
    input_variables=["page_content"],
    template="{page_content}"
)

stuff_prompt_override = """
请根据提供的上下文，回答问题。
上下文：
{context}
问题：
{question}
"""

prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=stuff_prompt_override
)

llm_chain = LLMChain(
    llm=llm,
    prompt=prompt,
)

WorkChain = StuffDocumentsChain(
    llm_chain=llm_chain,
    document_prompt=document_prompt,
    document_variable_name="context"
)

WorkChain.run(
    input_document=reo_docs,
    query="我最喜欢做什么事情？"
)