## 示例选择器
提供一个选择器，根据选择器中的逻辑，选择与输入最相关的示例。

In [None]:
from dashscope import TextEmbedding
from langchain.embeddings.base import Embeddings
from langchain.prompts.example_selector import SemanticSimilarityExampleSelector
from langchain.vectorstores import FAISS
from langchain.prompts import FewShotPromptTemplate, PromptTemplate
import json
import os
import numpy as np
from dashscope import Generation

# 读取配置文件
with open("config.json", "r") as f:
    config = json.load(f)

class DashScopeEmbeddings(Embeddings):
    def __init__(self, api_key: str = None):
        """初始化 DashScope Embeddings"""
        self.api_key = api_key or os.getenv('DASHSCOPE_API_KEY')
        self.embedding_dimension = 1536  # 千问的text-embedding-v2维度为1536

    def embed_documents(self, texts: list) -> list:
        """
        获取多个文本的嵌入向量
        :param texts: 文本列表
        :return: 嵌入向量列表
        """
        if not texts:
            return []
            
        all_embeddings = []
        # 逐个处理每个文本以避免批量处理时的格式问题
        for text in texts:
            embedding = self.embed_query(text)
            if embedding:
                all_embeddings.append(embedding)
            else:
                # 如果获取失败则使用后备方法
                all_embeddings.append(self._fallback_embed(text))
                
        return all_embeddings

    def embed_query(self, text: str) -> list:
        """获取单个文本的嵌入向量"""
        try:
            response = TextEmbedding.call(
                model='text-embedding-v2',
                input=text,
                api_key=self.api_key
            )
            
            if response.status_code == 200:
                # 确保返回的是纯数值列表
                embedding_vector = response.output['embeddings'][0]
                
                return embedding_vector['embedding']
                
                # embedding_vector = embedding_dict['embeddings']
                
                # 验证返回的向量格式
                if not isinstance(embedding_vector, list):
                    print(f"API返回的embedding不是列表格式: {type(embedding_vector)}")
                    return self._fallback_embed(text)
                    
                # 确保所有元素都是数值
                if not all(isinstance(x, (int, float)) for x in embedding_vector):
                    print("API返回的embedding包含非数值元素")
                    return self._fallback_embed(text)
                    
                return embedding_vector
            else:
                print(f"API调用失败: {response.code} - {response.message}")
                return self._fallback_embed(text)
                
        except Exception as e:
            print(f"生成嵌入向量时出错: {str(e)}")
            return self._fallback_embed(text)
    
    def _fallback_embed(self, text: str) -> list:
        """
        简单的后备嵌入方法
        :param text: 输入文本
        :return: 嵌入向量
        """
        print("使用后备嵌入方法")
        vector = np.zeros(self.embedding_dimension, dtype=np.float32)
        
        # 将文本转换为数值向量
        for i, char in enumerate(text):
            if i >= self.embedding_dimension:
                break
            vector[i] = ord(char) / 255.0  # 归一化字符码
            
        # 填充剩余位置
        for i in range(min(len(text), self.embedding_dimension), self.embedding_dimension):
            vector[i] = 0.0
            
        # 归一化向量
        norm = np.linalg.norm(vector)
        if norm > 0:
            vector = vector / norm
            
        return vector.tolist()

def chat_with_qwen(prompt):
    response = Generation.call(
        model=config['qwen_model'],
        prompt=prompt,
        api_key=config['api_key']
    )
    if response.status_code == 200:
        return response.output.text
    else:
        raise Exception(f"API调用失败: {response.code} - {response.message}")

# 示例提示词
example_prompt = PromptTemplate(
    input_variables=["input", "output"],
    template="输入: {input}\n输出: {output}"
)

# 示例
examples = [
    {"input": "人", "output": "房子"},
    {"input": "老鼠", "output": "农田"},
    {"input": "老虎", "output": "丛林"},
    {"input": "狗", "output": "狗窝"},
]

# 创建嵌入模型实例
embeddings = DashScopeEmbeddings(api_key=config["api_key"])

# 先测试一下嵌入功能
test_embedding = embeddings.embed_query("测试")
print(f"测试嵌入向量长度: {len(test_embedding)}")
print(f"嵌入向量类型: {type(test_embedding)}")
print(f"向量元素: {test_embedding}")

try:
    # 示例选择器 通过语义相似度选择示例
    example_selector = SemanticSimilarityExampleSelector.from_examples(
        examples=examples,
        embeddings=embeddings,
        vectorstore_cls=FAISS,
        k=1,
    )
    
    # 测试用输入
    my_input = {"input": "猫"}
    
    # 语义相似度选择示例
    similarity_search_prompt = FewShotPromptTemplate(
        example_selector=example_selector,
        example_prompt=example_prompt,
        prefix="以下是一些示例:\n",
        suffix="输入: {input}\n输出: ",
        input_variables=["input"],
    )
    
    print("=======")
    
    # 测试输出
    prompt = similarity_search_prompt.format(**my_input)
        
    print(prompt)
        
    output = chat_with_qwen(prompt)
    
    print("=======")
    
    print(output)
    
    
except Exception as e:
    import traceback
    print(f"发生错误: {e}")
    print(traceback.format_exc())

测试嵌入向量长度: 1536
嵌入向量类型: <class 'list'>
向量元素: [0.006821622278122147, 0.03540109861979729, 0.003791647894784124, -0.021653820608331088, -0.016452147847235768, -0.016333252469839302, 0.010054090351088524, -0.009682542296724573, -0.030912798123080756, -0.009058341565393134, 0.04800400862382252, -0.02193619712964769, 0.0383734830547089, -0.016184633248093723, 0.009533923074978993, 0.018532816951673897, 0.014995679474129078, -0.0010514809938499824, -0.0024707945615202766, 0.038046520766868624, -0.015545570594587726, 0.004993605850651507, -0.008909722343647555, 0.022456364405757222, -0.036322537794619886, -0.01183752101203549, 0.006223429910596186, -0.004317388391709115, -0.004503162418891091, 0.012513738470977883, 0.004302526469534557, 0.029456329749974065, 0.03296374338316977, -0.01575363750503154, -0.01994469955825691, -0.013323713229491297, 0.0064797980681073125, 0.030526388146542245, -0.007601873192286446, -0.0034293885417792714, 0.011681470829202631, 0.009043479643218577, 0.0094670444251