In [5]:
import json
import random
import os
import base64
import requests
import pandas as pd
import re

# 文件夹路径
file_path = '/Volumes/Jennie/Reasoning/FinMath/dataset/multiDemo.json'

In [28]:
#读取image+text的文件
def get_image_questions(file_path, num):
    # 设定随机种子
    random.seed(42)
    
    # 从文件中加载数据
    with open(file_path, "r", encoding="utf-8") as f:
        data = json.load(f)
    
    # 筛选出 "question type" 为 "text + image" 的对象
    text_image_questions = [item for item in data if item.get("Question Type") in ["text+image", "text + image"]]
    
    # 如果符合条件的对象少于 num，返回全部
    if len(text_image_questions) <= num:
        return text_image_questions
    
    # 随机返回 num 个符合条件的对象
    return random.sample(text_image_questions, num)


#将图片base64
def encode_images_to_base64(result):
    # 初始化一个字典来存储每个对象的base64编码
    base64_images = {}

    # 遍历每个对象
    for idx, item in enumerate(result):
        # 初始化存储图片编码的结构
        base64_images[idx] = {
            "image": None,
            "share_image": [],
        }

        # 处理 "image" 字段中的图片（如果存在且不为空）
        image_path = item.get("Image")
        if image_path != "":
            try:
                image_path="../dataset/"+image_path
                with open(image_path, "rb") as image_file:
                    base64_string = base64.b64encode(image_file.read()).decode('utf-8')
                    base64_images[idx]["image"] = base64_string
                    print(f"Successfully encoded {image_path}")
            except Exception as e:
                print(f"Error converting {image_path}: {e}")
        else:
            print(f"Skipping image, as it is empty or missing")

        # 处理 "share_image" 字段中的图片列表（如果存在且不为空）
        share_image_paths = item.get("Share Image", [])
        if share_image_paths != []:
            for share_image_path in share_image_paths:
                if os.path.exists(share_image_path):
                    try:
                        share_image_path="../dataset/"+share_image_path
                        with open(share_image_path, "rb") as image_file:
                            base64_string = base64.b64encode(image_file.read()).decode('utf-8')
                            base64_images[idx]["share_image"].append(base64_string)
                            print(f"Successfully encoded {share_image_path}")
                    except Exception as e:
                        print(f"Error converting {share_image_path}: {e}")
        else: 
            base64_images[idx]["share_image"].append(None)
            print(f"Skipping share image ")

        # # 处理 "options" 中的选项内容，如果是图片路径则转换，否则保持文本
        # for option_key, option_value in item.get("Options", {}).items():
        #     if os.path.exists(option_value):
        #         try:
        #             # 如果是图片路径，则转换为base64
        #             with open(option_value, "rb") as image_file:
        #                 base64_string = base64.b64encode(image_file.read()).decode('utf-8')
        #                 base64_images[idx]["options"].append(base64_string)
        #                 print(f"Successfully encoded {option_value}")
        #         except Exception as e:
        #             print(f"Error converting {option_value}: {e}")
        #     else:
        #         # 如果不是路径，则为空
        #         base64_images[idx]["options"].append(None)
        #         print(f"Skipping option {option_value}, as it is not a valid file path")

    return base64_images


#调用api
def process_and_send_images(result, base64_encoded_images, api_key):
    
    # # 定义用于 "options" 的字母映射
    # option_letters = ["A", "B", "C", "D"]
    
    # 存储结果
    results = {}

    # 创建请求头
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }

    # 遍历 base64_encoded_images 和 result
    for idx, (base64_image_data, result_data) in enumerate(zip(base64_encoded_images.values(), result)):
        # 处理每个图片的任务说明
        task_instruction = f"""
        You are a financial expert. You are supposed to
        answer the given question. You need to first think
        through the problem step by step, documenting each
        necessary step. Then you are required to conclude
        your response with the final answer in your last
        sentence as “Therefore, the answer is \\boxed{{}}”.

        Here is the question:
        Share_context: {result_data.get('Share Context')}
        Question: {result_data.get('Question Text')}
        Options: {result_data.get('Options')}
        
        Let's think step by step to answer the given question and put your answer in \\boxed{{}}. only one letter like "A" or "B" or "C"  is allowed in the \\boxed{{}}(e.g: \\boxed{{A}})
        """

        # # 如果 options 全为 None，则使用第一种任务说明
        # if all(option is None for option in base64_image_data['options']):
        #     task_instruction = f"""
        #     You are a financial expert, you are supposed to
        #     answer the given question. You need to first think
        #     through the problem step by step, documenting each
        #     necessary step. Then you are required to conclude
        #     your response with the final answer in your last
        #     sentence as “Therefore, the answer is \\boxed{{}}”.
        #     The final answer should be a numeric val.
        #     Here is the question:
        #     Share_context: {result_data.get('Share Context')}
            
        #     Question: {result_data.get('Question Text')}
        #     Options: {result_data.get('Options')}
            
        #     Let's think step by step to answer the given question and put your answer in \\boxed{{}}. only one letter like "A" or "B" or "C" or "D"  is allowed in the \\boxed{{}}(e.g: \\boxed{{A}})
        #     """
        # # # 否则，使用第二种任务说明
        # else:
        #     task_instruction = f"""
        #     You are a financial expert, you are supposed to
        #     answer the given question. You need to first think
        #     through the problem step by step, documenting each
        #     necessary step. Then you are required to conclude
        #     your response with the final answer in your last
        #     sentence as “Therefore, the answer is {{final answer}}”.
        #     The final answer should be a numeric value.
        #     Here is the question:
        #     Share_context: {result_data.get('Share Context')}
            
        #     Question: {result_data.get('Question Text')}
        #     Options: {result_data.get('Options')}
            
        #     Let's think step by step to answer the given question. But only output the answers(e.g: A), not anything else.
        #     """

        # 构建 payload，上传 base64 图片编码到 API
        messages = [
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": task_instruction}
                ]
            }
        ]

        # 处理 "image" 字段，如果不为 None，则添加到 messages
        if base64_image_data.get('Image'):
            messages[0]["content"].append({
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/png;base64,{base64_image_data['image']}"
                }
            })
            
            # 追加一个说明文本
            messages[0]["content"].append({
                "type": "text",
                "text": "This image above is associated with the question text."
            })

        # 处理 "share_image" 字段中的图片，如果不为 None，则添加到 messages
        for idx_share, share_image in enumerate(base64_image_data.get('Share Image', [])):
            if share_image:
                messages[0]["content"].append({
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/png;base64,{share_image}"
                    },
                   
                })
                            
                messages[0]["content"].append({
                    "type": "text",
                    "text": "This image above is the share_image_{idx_share + 1} associated with the share text."
                })

        # # 处理 "options" 字段中的图片，如果不为 None，则添加到 messages
        # for idx_option, option_image in enumerate(base64_image_data.get('Options', [])):
        #     if option_image:
        #         messages[0]["content"].append({
        #             "type": "image_url",
        #             "image_url": {
        #                 "url": f"data:image/png;base64,{option_image}"
        #             },
        #         })

        #         # 使用 A, B, C, D 作为说明文本
        #         if idx_option < len(option_letters):  # 防止超过选项字母的数量
        #             letter = option_letters[idx_option]
        #         else:
        #             letter = chr(65 + idx_option)  # 动态生成字母，如果超过D则继续生成E, F, G...

        #         # 追加一个说明文本
        #         messages[0]["content"].append({
        #             "type": "text",
        #             "text": f"This is option {letter}'s image."
        #         })


        # 构建最终的 payload
        payload = {
            "model": "gpt-4o",
            "messages": messages,
            "max_tokens": 1000
        }

        # 打印当前构建的 messages 结构
        print(f"Messages for question {idx}:")
        print(json.dumps(messages, indent=4))

        # 发送请求到 API
        response = requests.post("https://api.xi-ai.cn/v1/chat/completions", headers=headers, json=payload)
        response_json = response.json()

        # 解析响应并存储结果
        if 'choices' in response_json and len(response_json['choices']) > 0:
            response_content = response_json['choices'][0]['message']['content']
            results[idx] = {
                "response_content": response_content
            }
        else:
            results[idx] = {
                "response_content": "No valid response"
            }

        # 打印每个文件的响应
        print(f"Results for question {idx}: {json.dumps(results[idx], indent=4)}")

    return results


# 计算准确率
def calculate_accuracy(final_results, result):
    # 统计准确和总计数
    correct_count = 0
    total_count = len(final_results)

    # 遍历每个对象，比较 response_content 和 result 中的 answer
    for idx, final_result in final_results.items():
        response_content = final_result['response_content']
        
        # 使用正则表达式从 response_content 中提取出 \boxed{} 中的内容
        match = re.search(r'\\boxed{(.+?)}', response_content)
        if match:
            response_answer = match.group(1)  # 提取出 \boxed{} 中的内容
        else:
            response_answer = None
        
        correct_answer = result[idx].get('Answer')  # 从 result 中获取正确答案
        
        # 比较 response_answer 和 correct_answer
        if response_answer == correct_answer:
            correct_count += 1
            print(f"Question {idx}: Correct (Answer: {correct_answer}, Response: {response_answer})")
        else:
            print(f"Question {idx}: Incorrect (Answer: {correct_answer}, Response: {response_answer})")
    
    # 计算准确率
    accuracy = correct_count / total_count * 100
    print(f"\nAccuracy: {accuracy:.2f}%")

In [29]:
result = get_image_questions(file_path,1) #“3”代表选几道题
result

[{'ID': 2271,
  'Question Number': 108,
  'Share Context': '',
  'Share Image': '',
  'Question Text': 'Instead of residual-based information ratio (IR), it is also acceptable to compute information ratio (IR) based on active returns. The following table displays twelve (12) months of returns comparing a portfolio (P) to its benchmark (B); the final column shows the difference each month:',
  'Image': 'images/Foundationofriskmanagement1_images/108u.png',
  'Options': {'A': ' 0.404', 'B': ' 0.651', 'C': ' 0.950', 'D': ' 1.237'},
  'Answer': 'B',
  'Explanation': 'The annualized ex post (active-based) information ratio is calculated as follows: IR = (0.0044 × 12) / (0.0234 × √12) = 0.65137.',
  'QA Type': 'Math reasoning QA',
  'Question Type': 'text+image',
  'Level of Difficulty': 'Easy',
  'Knowledge Topics': 'information ratio, active return, ex-post performance',
  'General Topics': 'Foundation of Risk Management',
  'Book Label': 'foundation of risk management1'}]

In [30]:

base64_encoded_images = encode_images_to_base64(result)
# 打印结果（仅打印部分Base64以节省空间）
for idx, content in base64_encoded_images.items():
    print(f"Question {idx}:")
    print(f"Image: {content['image'][:30] if content['image'] else None}")  # 只显示前30个字符
    print(f"Share Images: {[img[:30] if img else None for img in content['share_image']]}")

Successfully encoded ../dataset/images/Foundationofriskmanagement1_images/108u.png
Question 0:
Image: iVBORw0KGgoAAAANSUhEUgAAA+AAAA
Share Images: []


In [31]:
api_key = "sk-VUQOgoxNjtiPxFDo895535A3635847B7A903688099089385"
final_results = process_and_send_images(result, base64_encoded_images, api_key)

Messages for question 0:
[
    {
        "role": "user",
        "content": [
            {
                "type": "text",
                "text": "\n        You are a financial expert. You are supposed to\n        answer the given question. You need to first think\n        through the problem step by step, documenting each\n        necessary step. Then you are required to conclude\n        your response with the final answer in your last\n        sentence as \u201cTherefore, the answer is \\boxed{}\u201d.\n\n        Here is the question:\n        Share_context: \n        Question: Instead of residual-based information ratio (IR), it is also acceptable to compute information ratio (IR) based on active returns. The following table displays twelve (12) months of returns comparing a portfolio (P) to its benchmark (B); the final column shows the difference each month:\n        Options: {'A': ' 0.404', 'B': ' 0.651', 'C': ' 0.950', 'D': ' 1.237'}\n        \n        Let's think step by step

In [13]:
calculate_accuracy(final_results, result)

Question 0: Correct (Answer: A, Response: A)

Accuracy: 100.00%
