In [3]:
from dotenv import load_dotenv
import os
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_openai import ChatOpenAI
import base64
import re
from IPython.display import display, Markdown,Image
# 加载 .env 文件
load_dotenv()

# 获取环境变量
langchain_tracing = os.getenv("LANGCHAIN_TRACING_V2")
langchain_endpoint = os.getenv("LANGCHAIN_ENDPOINT")
langchain_api_key = os.getenv("LANGCHAIN_API_KEY")
langchain_project = os.getenv("LANGCHAIN_PROJECT")

api_key=os.getenv("API_KEY")
base_url=os.getenv("BASE_URL")
deepseek_api_key=os.getenv("DEEPSEEK_API_KEY")
deepseek_base_url=os.getenv("DEEPSEEK_BASE_URL")

In [49]:
def inputPrompt (question):
       
    # 构建系统消息
    system_message = SystemMessage(
        content="You are a financial expert. You will be given questions and options, possibly with context information and images. Please answer the question."
    )

    # 构建用户消息
    human_message=HumanMessage(content=[])

    if len(question["Share Context"]) != 0:
        human_message.content.append({"type": "text", "text": "Context: " + question["Share Context"]})

    if len(question["Share Image"])!= 0:
        for path in question["Share Image"]:
            image_url = "/Volumes/Jennie/Reasoning/FinMath/dataset/"+path
            with open(image_url, "rb") as image_file:
                image_data = base64.b64encode(image_file.read()).decode("utf-8")
            human_message.content.append({"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}})

    human_message.content.append({"type": "text", "text": "Question: "+ question["Question Text"]})

    if len(question["Image"]) != 0:
        image_url = "/Volumes/Jennie/Reasoning/FinMath/dataset/"+question["Image"]
        with open(image_url, "rb") as image_file:
            image_data = base64.b64encode(image_file.read()).decode("utf-8")
        human_message.content.append({"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}})

    human_message.content.append({"type": "text", "text": "Options: " + str(question["Options"])})

    human_message.content.append({"type": "text", "text": "Let's think step by step. The output reasoning steps are in Markdown format. Finally, must put the correct option (A, B, C, or D) in【 】. e.g.Therefore, the correct option is 【B】."})

    response = [system_message, human_message]
    return response



##Utils
def FeedbackPrompt (question):
       
    system_message = SystemMessage(
            content="""You are a financial expert. You will be given questions and options, possibly with context information and images. Also, you will be given wrong reasoning steps and correct reasoning hints.You are supposed to give feedback.""")

    # 构建用户消息
    human_message=HumanMessage(content=[])

    if len(question["Share Context"]) != 0:
        human_message.content.append({"type": "text", "text": "Context: " + question["Share Context"]})

    if len(question["Share Image"])!= 0:
        for path in question["Share Image"]:
            image_url = "/Volumes/Jennie/Reasoning/FinMath/dataset/"+path
            with open(image_url, "rb") as image_file:
                image_data = base64.b64encode(image_file.read()).decode("utf-8")
            human_message.content.append({"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}})

    human_message.content.append({"type": "text", "text": "Question: "+ question["Question Text"]})

    if len(question["Image"]) != 0:
        image_url = "/Volumes/Jennie/Reasoning/FinMath/dataset/"+question["Image"]
        with open(image_url, "rb") as image_file:
            image_data = base64.b64encode(image_file.read()).decode("utf-8")
        human_message.content.append({"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}})

    human_message.content.append({"type": "text", "text": "Options: " + str(question["Options"])})
    human_message.content.append({"type": "text", "text": "Wrong Reasoning Steps: " + question["Model Reasoning"]})
    human_message.content.append({"type": "text", "text": "Wrong Answer: " + question["Model Answer"]})
    human_message.content.append({"type": "text", "text": "Correct Reasoning Steps: " + question["Explanation"]})
    human_message.content.append({"type": "text", "text": "Correct Answer: " + question["Answer"]})

    human_message.content.append({"type": "text", "text": """ Please give the feedback in Markdown format. 1. Please output correct reasoning steps according to hints. 2. compare the correct reasoning step with the model's wrong reasoning step, and point out the difference. 3. summarize the hint for future simalar questions."""})

    response = [system_message, human_message]
    return response


def ICLPrompt (question,example):
       
    # 构建系统消息
    system_message = SystemMessage(
        content="You are a financial expert. You will be given previous learning document including questions and options, possibly with context information and images. Please answer the current question."
    )

    # 构建用户消息
    human_message=HumanMessage(content=[])
    human_message.content.append({"type": "text", "text": "Previous Learning Document: "})
    if len(example["Share Context"]) != 0:
        human_message.content.append({"type": "text", "text": "Context: " + example["Share Context"]})

    if len(example["Share Image"])!= 0:
        for path in example["Share Image"]:
            image_url = "/Volumes/Jennie/Reasoning/FinMath/dataset/"+path
            with open(image_url, "rb") as image_file:
                image_data = base64.b64encode(image_file.read()).decode("utf-8")
            human_message.content.append({"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}})

    human_message.content.append({"type": "text", "text": "Question: "+ example["Question Text"]})

    if len(example["Image"]) != 0:
        image_url = "/Volumes/Jennie/Reasoning/FinMath/dataset/"+example["Image"]
        with open(image_url, "rb") as image_file:
            image_data = base64.b64encode(image_file.read()).decode("utf-8")
        human_message.content.append({"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}})

    human_message.content.append({"type": "text", "text": "Options: " + str(example["Options"])})
    human_message.content.append({"type": "text", "text": "Wrong Reasoning Steps: " + example["Model Reasoning"]})
    human_message.content.append({"type": "text", "text": "Feedback: " + example["Feedback"]})


    human_message.content.append({"type": "text", "text": "Current Question is as follows: "})
    if len(question["Share Context"]) != 0:
        human_message.content.append({"type": "text", "text": "Context: " + question["Share Context"]})

    if len(question["Share Image"])!= 0:
        for path in question["Share Image"]:
            image_url = "/Volumes/Jennie/Reasoning/FinMath/dataset/"+path
            with open(image_url, "rb") as image_file:
                image_data = base64.b64encode(image_file.read()).decode("utf-8")
            human_message.content.append({"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}})

    human_message.content.append({"type": "text", "text": "Question: "+ question["Question Text"]})

    if len(question["Image"]) != 0:
        image_url = "/Volumes/Jennie/Reasoning/FinMath/dataset/"+question["Image"]
        with open(image_url, "rb") as image_file:
            image_data = base64.b64encode(image_file.read()).decode("utf-8")
        human_message.content.append({"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}})

    human_message.content.append({"type": "text", "text": "Options: " + str(question["Options"])})

    human_message.content.append({"type": "text", "text": "Let's think step by step. The output reasoning steps are in Markdown format. Finally, must put the correct option (A, B, C, or D) in【 】. e.g.Therefore, the correct option is 【B】."})

    response = [system_message, human_message]
    return response


In [60]:
from typing import List
from langchain_core.output_parsers import BaseGenerationOutputParser
from langchain_core.outputs import Generation
from IPython.display import display, Markdown

class MarkdownParser(BaseGenerationOutputParser[str]):
    """
    A custom parser that formats the model output for Markdown display
    by replacing LaTeX-style delimiters \[ and \] with $.
    """
    def parse_result(self, result: List[Generation], *, partial: bool = False) -> str:
        """Parse the model output and format it as Markdown.

        Args:
            result: A list of Generations (assumed to contain only one string).
            partial: Whether to allow partial results (for streaming, not used here).

        Returns:
            A Markdown-formatted string with LaTeX-style delimiters replaced.
        """
        # Ensure there's only one generation
        if len(result) != 1:
            raise ValueError("This parser only supports a single generation.")
        
        # Extract the generation content
        generation = result[0]
        if not isinstance(generation.text, str):
            raise ValueError("Expected text output for Markdown formatting.")
        
        # Replace  \\[ and \\] with $ for LaTeX-style display
        formatted_text = generation.text.replace('\\[', '$').replace('\\]', '$').replace('\\(', '$').replace('\\)', '$')
        
        import re



        return formatted_text
    

import re
from langchain.tools import Tool

def extract_answer(text: str) -> str:
    """Extract the answer option (A, B, C, or D) in brackets from the given text."""
    # Regular expression to find the answer in brackets, e.g., [C]
    match = re.search(r"\【([A-D])\】", text)
    if match:
        return match.group(1)  # Returns the answer option (e.g., "C")
    else:
        return "Answer not found"  # Returns a message if no answer is found

# Wrap extract_answer in a LangChain Tool to make it invokable
extract_answer_tool = Tool.from_function(
    func=extract_answer,
    name="Extract Answer Tool",
    description="Extracts the answer option in brackets (e.g., 【C】) from the provided text."
)

import json
import os

def write_output(data, file_path):
    # 如果文件存在，先读取现有数据
    if os.path.exists(file_path):
        with open(file_path, 'r', encoding='utf-8') as f:
            existing_data = json.load(f)
    else:
        existing_data = []

    # 合并新数据到现有数据中
    if isinstance(existing_data, list):
        existing_data.append(data)
    else:
        existing_data = data

    # 将合并后的数据写入到 JSON 文件
    with open(file_path, 'w', encoding='utf-8') as f:
        json.dump(existing_data, f, ensure_ascii=False, indent=4)


In [61]:
import json
from pathlib import Path
from pprint import pprint

file_path = '/Volumes/Jennie/Reasoning/FinMath/dataset/testdata.json'
data = json.loads(Path(file_path).read_text())
da=data[0]
write_output(da, ModelRAGOutputPath)

In [63]:
# 初始化模型
gptmodel = ChatOpenAI(model="gemini-pro", api_key=os.getenv("API_KEY"), base_url=os.getenv("BASE_URL"))
outputParser=MarkdownParser()
chain = gptmodel|outputParser

ErrorLogPath="/Volumes/Jennie/Reasoning/FinMath/errorLog/Gemini_1.5_pro_ErrorLog.json"
ModelOutputPath="/Volumes/Jennie/Reasoning/FinMath/output/Gemini_1.5_pro_output.json"
ModelRAGOutputPath="/Volumes/Jennie/Reasoning/FinMath/output/Gemini_1.5_pro_rag_output.json"

index=init_faiss()# 初始化数据库
index_faiss(index,ErrorLogPath) #把现在的errorlog加入到向量数据库

for question in data:
    reasoning=chain.invoke(inputPrompt(question))
    answer=extract_answer(reasoning)    

    modelOutput=question
    modelOutput["Model Answer"]=answer
    modelOutput["Model Reasoning"]=reasoning
    write_output(modelOutput, ModelOutputPath)

    if answer == question["Answer"]:
        write_output(modelOutput, ModelRAGOutputPath)
    else:
        cos,I = query_embedding_faiss(question, index, k=5)
        erroRLog=json.loads(Path(ErrorLogPath).read_text())
        errorexample=erroRLog[I[0][0]]
        reasoning=chain.invoke(ICLPrompt(question,errorexample))
        answer=extract_answer(reasoning)
        display(Markdown(reasoning))
        print("=====================================")
        modelOutput["Model Answer"]=answer
        modelOutput["Model Reasoning"]=reasoning  
        write_output(modelOutput, ModelRAGOutputPath) 

        feedback=chain.invoke(FeedbackPrompt(modelOutput))
        modelOutput["Feedback"]=feedback
        write_output(modelOutput, ErrorLogPath)


        

        

Number of vectors after reset: 0
Number of vectors after adding: 1


To determine the covariance between markets A and B using the two-factor model, we'll follow these steps:

### Step-by-Step Solution

1. **Identify the Formula:**

   The covariance between two markets in a two-factor model is given by:

   $
   \text{Cov}(A, B) = \beta_{A,1} \beta_{B,1} \sigma^2_{F_1} + \beta_{A,2} \beta_{B,2} \sigma^2_{F_2} + (\beta_{A,1} \beta_{B,2} + \beta_{A,2} \beta_{B,1}) \text{Cov}(F_1, F_2)
   $

2. **Substitute the Given Values:**

   - $\beta_{A,1} = 0.75$
   - $\beta_{B,1} = 0.45$
   - $\beta_{A,2} = 0.20$
   - $\beta_{B,2} = 0.65$
   - $\sigma^2_{F_1} = -0.0132$ (from the matrix)
   - $\sigma^2_{F_2} = 0.0089$ (from the matrix)
   - $\text{Cov}(F_1, F_2) = -0.0132$

3. **Calculate Each Component:**

   - First term: $(0.75 \times 0.45 \times 0.0089) = 0.00300125$
   - Second term: $(0.20 \times 0.65 \times 0.0089) = 0.001157$
   - Third term: $(0.75 \times 0.65 + 0.20 \times 0.45) \times (-0.0132) = (0.4875 + 0.09) \times (-0.0132) = 0.5775 \times (-0.0132) = -0.0076224$

4. **Combine the Components:**

   $
   \text{Cov}(A, B) = 0.00300125 + 0.001157 - 0.0076224 = -0.00346415
   $

5. **Compare with Options:**

   - Option A: -0.215
   - Option B: -0.113
   - Option C: 0.113
   - Option D: 0.215

   Since our calculated result (-0.00346415) is closest to 0.113, it appears there was a mistake in calculation. Let's correct it:

### Correct Calculation

Re-evaluating the terms, the correct formula application should yield:

   - First term: $(0.75 \times 0.45 \times (-0.0132)) = -0.004455$
   - Second term: $(0.20 \times 0.65 \times 0.0089) = 0.001157$
   - Third term: $(0.75 \times 0.65 + 0.20 \times 0.45) \times (-0.0132) = -0.0076224$

   $
   \text{Cov}(A, B) = -0.004455 + 0.001157 - 0.0076224 = -0.01092
   $

Thus, it seems there was a miscalculation in the steps. The correct option based on proper recalculations should still be the closest provided option, which is:

Therefore, the correct option is 【C】.



To find the discount factor for the first year, we need to determine the present value of Bond B's cash flows and equate it to its price. 

### Step-by-Step Solution

1. **Identify Bond B Details:**
   - Maturity: 1 year
   - Coupon rate: 12%
   - Price: 102.341

2. **Calculate Cash Flows for Bond B:**
   - Annual Coupon Payment = 12% of Face Value
   - Assuming Face Value = 100 (standard assumption for bonds)
   - Coupon Payment = $0.12 \times 100 = 12$

3. **Set Up the Equation for Present Value:**
   $
   \text{Price of Bond B} = \frac{\text{Coupon Payment} + \text{Face Value}}{1 + r} = 102.341
   $

   $
   \frac{12 + 100}{1 + r} = 102.341
   $

4. **Solve for the Discount Factor (d(1.0)):**
   $
   \frac{112}{1 + r} = 102.341
   $

   $
   1 + r = \frac{112}{102.341}
   $

   $
   1 + r \approx 1.0944
   $

   $
   d(1.0) = \frac{1}{1 + r} \approx \frac{1}{1.0944} \approx 0.9138
   $

5. **Compare with Options:**
   - Option A: 0.9099
   - Option B: 0.9138
   - Option C: 0.9655
   - Option D: 0.9823

   The closest value to the calculated discount factor (0.9138) is 0.9138.

Therefore, the correct option is 【B】.



To solve this problem, we need to calculate the present value factors for the bonds given their prices, coupons, and maturities. We'll do this step by step for each bond.

### Step-by-Step Solution

1. **Identify the Bond Information:**

   - **Bond 1**: 
     - Coupon: 7.500%
     - Maturity: 12/1/2005
     - Price: 102-9 (which is 102 + 9/32 = 102.28125)

   - **Bond 2**:
     - Coupon: 12.375%
     - Maturity: 6/1/2006
     - Price: 107-15 (which is 107 + 15/32 = 107.46875)

2. **Calculate Present Value Factors:**

   - We need to determine the present value factor for each bond using the formula:

     $
     \text{PV Factor} = \frac{\text{Price}}{\text{Face Value}}
     $

   - Assume Face Value is 100 for simplicity.

   - **Bond 1 PV Factor:**

     $
     \text{PV Factor} = \frac{102.28125}{100} = 1.0228125
     $

   - **Bond 2 PV Factor:**

     $
     \text{PV Factor} = \frac{107.46875}{100} = 1.0746875
     $

3. **Compare with Options:**

   - We need to find the closest pair of present value factors in the options.

   - Options:
     - A: 0.9696/0.9858
     - B: 0.9858/0.9546
     - C: 0.9546/0.9696
     - D: 0.9778/0.9696

4. **Conclusion:**

   None of the options directly match the calculated PV factors. However, if we consider the given options with typical bond yields, the closest seems to be looking for the discounting effect rather than direct matching. Therefore, based on maturity and typical pricing, the option selection must match the context of yield and pricing typical patterns.

   However, with the current context, none of the options seem to directly correlate to the calculated factors. Assuming a typical pricing and discounting context related to bonds and face value calculations, the option closest to typical market scenarios based on the price and value ratios would be re-evaluated based on conversion factors or additional context.

   Given the context and typical bond pricing practices, we can assume the correct answer based on partial pricing and conversion elements might be one of the options and needs to be inferred as follows.

Therefore, the correct option is 【C】.



To determine which line is not likely the efficient frontier for assets X and Y, consider the characteristics of an efficient frontier:

1. **Efficient Frontier Definition:** The efficient frontier represents portfolios offering the highest expected return for a given level of risk (standard deviation).

2. **Shape of Efficient Frontier:** It is typically an upward-curving line, reflecting increased returns with increased risk.

3. **Analysis of the Lines:**
   - **Line A:** Appears to be the outermost upward-curving line, likely representing the efficient frontier.
   - **Line B:** Also curving upwards, although not as much as A. It could represent suboptimal portfolios.
   - **Line C:** Is a straight line, which is atypical for an efficient frontier unless it represents a risk-free asset line, which is not applicable here.
   - **Line D:** Curves downwards, which is the opposite of what is expected for an efficient frontier.

4. **Conclusion:** Since Line D curves downwards, it is not likely to be the efficient frontier.

Therefore, the correct option is 【D】.



To determine the most efficient portfolio, we need to compare the risk-return trade-off for each portfolio. This is often done using the Sharpe Ratio, which is calculated as:

$
\text{Sharpe Ratio} = \frac{\text{Expected Return} - \text{Risk-Free Rate}}{\text{Standard Deviation}}
$

However, since the risk-free rate is not provided, we can compare the expected return per unit of risk (standard deviation) directly.

### Step-by-Step Solution

1. **Calculate the Return per Unit of Risk for Each Portfolio:**

   - **Portfolio 1:**
     $
     \frac{10\%}{14\%} = 0.714
     $

   - **Portfolio 2:**
     $
     \frac{12\%}{13\%} = 0.923
     $

   - **Portfolio 3:**
     $
     \frac{11\%}{12\%} = 0.917
     $

   - **Portfolio 4:**
     $
     \frac{14\%}{18\%} = 0.778
     $

2. **Compare the Ratios:**

   - Portfolio 1: 0.714
   - Portfolio 2: 0.923
   - Portfolio 3: 0.917
   - Portfolio 4: 0.778

3. **Determine the Portfolio with the Highest Ratio:**

   Portfolio 2 has the highest ratio of 0.923, indicating it offers the best return per unit of risk.

Therefore, the correct option is 【B】.



To solve the question, let's analyze each option step-by-step using the regression results provided.

### Step 1: Calculate the Correlation Coefficient
- **Explained Sum of Squares (ESS):** 92.648
- **Total Sum of Squares (TSS):** 117.160

The coefficient of determination ($ R^2 $) is calculated as:
$
R^2 = \frac{\text{ESS}}{\text{TSS}} = \frac{92.648}{117.160} \approx 0.791
$

The correlation coefficient ($ R $) is the square root of $ R^2 $:
$
R \approx \sqrt{0.791} \approx 0.889
$

Thus, Option A is correct.

### Step 2: Test the Significance of the Industry Index Coefficient
The t-statistic for the industry index coefficient is calculated as:
$
t = \frac{\text{Coefficient}}{\text{Standard Error}} = \frac{1.9}{0.31} \approx 6.13
$

For a 99% confidence interval with degrees of freedom equal to 3, the critical t-value is approximately 3.182. Since 6.13 is greater than 3.182, the coefficient is significant at the 99% confidence level.

Thus, Option B is correct.

### Step 3: Calculate the Stock’s Expected Return
The regression equation is:
$
\text{Stock's Expected Return} = \text{Intercept} + (\text{Industry Index Coefficient} \times \text{Industry Return})
$

Substituting the given values:
$
= 2.1 + (1.9 \times 4) = 2.1 + 7.6 = 9.7
$

Thus, Option C is correct.

### Step 4: Calculate the Percentage of Variability Explained
The percentage of variability explained by the industry index returns is given by $ R^2 $:
$
R^2 \approx 0.791 \quad \text{(or 79.1%)}
$

Thus, Option D is incorrect as it states 21%.

### Conclusion
Only one option can be correct, and Option A is consistent with the calculation of the correlation coefficient.

Therefore, the correct option is 【A】.



Retrieval

In [21]:
import warnings
warnings.filterwarnings("ignore")
from IPython.display import Markdown,Image,Latex, display
from sentence_transformers import SentenceTransformer
from PIL import Image
from transformers import CLIPProcessor, CLIPModel
import faiss
import torch
import pandas as pd
import json
import faiss
import numpy as np

In [32]:
# 加载CLIP模型和处理器
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
#初始化数据库
def init_faiss():
    index = faiss.IndexFlatIP(1024)  # 使用内积度量
    if index.ntotal==0:
        # 初始化 Faiss 索引，使用内积 (dot product) 作为距离度量
        pass
    # 清空Faiss索引中的所有向量
    else:
        index.reset()
    # 检查索引是否清空
    print("Number of vectors after reset:", index.ntotal)
    return index
def index_faiss(index,file_path):
    data = json.loads(Path(file_path).read_text())
    for error in data:
        storeEmbedding(index,error)
    print("Number of vectors after adding:", index.ntotal)
    return index


In [39]:
def clipEmbedding(data):
    textdata = "Question:" + data.get("Question Text") + " Options:" + str(data.get("Options")) + " Correct Answer:" + data.get("Answer")
    
    # 检查是否有图片
    if data.get("Image") != '':
        image_path = "/Volumes/Jennie/Reasoning/FinMath/dataset/"+ data.get("Image")
        # print(image_path)
        image = Image.open(image_path)
        # print(image)
        
        # 生成文本和图像的嵌入，添加 truncation=True 和 max_length=77
        inputs = processor(text=[textdata], images=image, return_tensors="pt", padding=True, truncation=True, max_length=77)
        
        # 使用CLIP模型生成嵌入
        outputs = model(**inputs)
        image_embedding = outputs.image_embeds  # 图像嵌入
        text_embedding = outputs.text_embeds  # 文本嵌入
    else:
        # 如果没有图像，生成文本嵌入
        inputs = processor(text=[textdata], return_tensors="pt", padding=True, truncation=True, max_length=77)
        text_embedding = model.get_text_features(**inputs)
        
        # 创建一个与图像嵌入维度相同的零向量
        image_embedding = torch.zeros((text_embedding.shape[0], 512))  # 假设图像嵌入维度是512
    
    # 将文本和图像嵌入拼接在一起
    combined_embedding = torch.cat((text_embedding, image_embedding), dim=-1)
    
    return combined_embedding


def normalize(embeddings):
    # 归一化函数，计算余弦相似度时将向量进行归一化
    norms = torch.norm(embeddings, dim=1, keepdim=True)  # 计算每个向量的范数
    return embeddings / norms  # 将向量归一化，使其范数变为1


# 生成嵌入
def storeEmbedding(index,data):
    error_log_embedding = clipEmbedding(data)
    # 对嵌入进行归一化，以便计算余弦相似度
    error_log_embedding = normalize(error_log_embedding)
    # 将生成的多模态嵌入转换为numpy数组并添加到Faiss索引中
    error_log_embedding_np = error_log_embedding.detach().numpy()  # 确保转换为numpy格式
    index.add(error_log_embedding_np)  # 将嵌入添加到Faiss索引中
    return index

# 查询函数
def query_embedding_faiss(query_data, index, k=5):
    # 生成查询嵌入
    query_embedding = clipEmbedding(query_data)
    query_embedding = normalize(query_embedding)  # 归一化查询向量
    
    # 转换为 numpy 格式
    query_embedding_np = query_embedding.detach().numpy()
    
    # 检索 Faiss 中与查询向量最相似的 k 个向量
    D, I = index.search(query_embedding_np, k)  # D 是余弦相似度，I 是对应的索引

    
    return D, I

