In [13]:
from PIL import Image
import os
import json
import pandas as pd
from IPython.display import Markdown, display


In [7]:
# Set the path to your image folder and JSON file
image_folder = 'data/images/Foundation_of_risk_management'
json_file_path = 'data/Foundationofriskmanagement.json'

# Load the JSON file
with open(json_file_path, 'r') as json_file:
    data = json.load(json_file)

# Define a function to load the image based on the image path in the JSON
def process_image(image_path):
    try:
        if os.path.exists(image_path):
            return Image.open(image_path)
        else:
            return None  # If the image file doesn't exist, return None
    except Exception as e:
        return None  # Handle any errors that occur during image loading

# Convert the JSON data into a list of dictionaries with the required fields
data_list = []
for key, item in data.items():
    image_path = item["question"].get("image", "")

    # Check if the image field contains a valid path and process it
    if image_path:
        full_image_path = os.path.join(image_folder, os.path.basename(image_path))  # Join with image folder path
        pos_image = process_image(full_image_path)  # Process image using full path
    else:
        pos_image = None

    # Append the processed data
    data_list.append({
        "ID": key,
        "question": item["question"]["text"],
        "image": image_path,  # Keep original image path
        "pos_image": pos_image,  # Store processed image
        "options": item["question"].get("Options", []),
        "answer": item.get("answer", ""),
        "analysis": item.get("analysis", ""),
        "QA type": item.get("QA type", ""),
        "knowledge topics": item.get("knowledge topics", ""),
        "book label": item.get("book label", ""),
        "level of difficulty": item.get("level of difficulty", ""),
        "question type": item.get("question type", "")
    })

# Convert the list of dictionaries into a DataFrame
df = pd.DataFrame(data_list)


In [8]:
df[11:15]

Unnamed: 0,ID,question,image,pos_image,options,answer,analysis,QA type,knowledge topics,book label,level of difficulty,question type
11,13,Consider the following three well-diversified ...,/foundation_of_risk_management/13_table.png,<PIL.PngImagePlugin.PngImageFile image mode=RG...,"[A. No, all three well-diversified portfolios ...",C,There is no arbitrage opportunity if all three...,math reasoning QA,"arbitrage opportunity, portfolio management, T...",foundation of risk management,easy,text+image
12,14,Which component is NOT in the APT model?,,,"[A. Factor exposure, B. Factor return, C. Fact...",C,APT gives expected return (first moment) as a ...,knowledge reasoning QA,"APT model, factor returns, idiosyncratic return",foundation of risk management,easy,text only
13,15,Melody Li is a junior risk analyst who has rec...,,,[A. Purchasing an insurance policy is an examp...,B,The complexity of derivatives pricing means th...,knowledge reasoning QA,"hedging, derivatives, financial distress, disc...",foundation of risk management,easy,text only
14,16,Which of the following statements regarding co...,,,[A. Management of the organization is ultimate...,B,The Board of Directors is ultimately responsib...,knowledge reasoning QA,"corporate risk governance, risk oversight, ris...",foundation of risk management,easy,text only


In [12]:
from openai import OpenAI

client = OpenAI(api_key="sk-T2QXqSmLhdFRbr44352418B0F8D3498dA077C0EbE34b415a", base_url="https://api.xi-ai.cn/v1")

def gen_answer(question,image,options):
  agent = client.chat.completions.create(
      model="gpt-4o",
      stream=False,
      messages=[
          {"role": "system", "content": "you are a professor in finance department."},
          {"role": "user", "content": f"Firstly you generate the questions' answer of question {question,image,options}, then show the rational analysis step by step"}
      ]
  )
  return agent.choices[0].message.content

In [None]:
# prompt: gpt生成explaination 需要markdown格式 数学公式和符号用行内公式或者块级公式

def load_image(image_object):
    """
    Load and return the image object.
    Parameters:
    - image_object: A PIL image object.

    Returns:
    - image: The image that has been loaded.
    """
    return image_object

def process_image(image):
    """
    Placeholder function for image processing.
    You can add more complex image processing here if necessary.
    For now, we just confirm the image was processed.
    """
    # In a real scenario, you might want to process the image with OCR or other techniques
    return "Image has been successfully processed."

def get_gpt4o_response(question, image, options):
    """
    Function to call GPT-4-turbo (GPT-4o) to get an answer and explanation.

    Parameters:
    - question: The text of the question.
    - image: The PIL image object associated with the question.
    - options: A list of answer options.

    Returns:
    - answer: The selected answer.
    - explanation: Explanation of how the answer was determined.
    """

    # Process the image if available
    if image:
        image_description = process_image(image)
    else:
        image_description = "No image provided."

    # Set your OpenAI API key
    #openai.api_key = 'your-openai-api-key'  # Replace with your actual OpenAI API key

    # Prepare the prompt for GPT-4-turbo (GPT-4o)
    prompt = f"""
    You are a financial expert. Given the following question, image, and options, provide the correct answer and detailed step-by-step explanation. The explanation should be formatted in Markdown, and all mathematical expressions should use LaTeX format, enclosed in `$$` for block-level rendering. The steps should be presented clearly, with appropriate formatting and structure, as demonstrated in the assistant's responses.

    Ensure the output is:

    1. Clearly structured.
    2. Steps are numbered where necessary.
    3. All equations are rendered in LaTeX and formatted for Markdown.

    ### Question:
    {question}

    ### Image Description:
    {image_description}

    ### Options:
    """


    for i, option in enumerate(options):
        prompt += f"{chr(65 + i)}. {option}\n"

    prompt += """
    Please provide the correct answer and a detailed explanation.
    """

    # Call the GPT-4o (GPT-4-turbo) model
    response = client.chat.completions.create(
        model="gpt-4o",  # Use gpt-4-turbo (GPT-4o)
        messages=[
            {"role": "system", "content": "You are an expert financial assistant."},
            {"role": "user", "content": prompt}
        ]
    )

    # Extract the response from the GPT model
    message = response.choices[0].message.content

    return message


# Example usage with your dataframe data:
# Assuming `df` is your dataframe
for index, row in df[20:30].iterrows():
    # 获取问题、图片和选项
    question = row['question']
    image = row['pos_image']  # 这是一个 PIL image 对象
    options = row['options']
    answer = row['answer']  # 获取答案
    analysis = row['analysis']  # 获取解析

    # 加载图片（如果存在）
    if image is not None:
        processed_image = load_image(image)
    else:
        processed_image = None

    # 调用 GPT-4-turbo 获取答案和解析
    response = get_gpt4o_response(question, processed_image, options)

    # 打印问题
    print(f"Question {index}: {question}")

    # 打印图片信息（假设你打印的是图片描述或者路径，如果需要显示实际图片，可以通过 Jupyter 的显示工具）
    print(f"Image associated with question {index}:")
    if image:
        display(image)

    # 打印选项
    print(f"Options for question {index}:")
    for i, option in enumerate(options, 1):
        print(f"{chr(65 + i - 1)}. {option}")

    # 打印答案
    print(f"Correct Answer for question {index}: {answer}")

    # 打印解析，使用 Markdown 渲染，并使用 LaTeX 渲染数学公式
    display(Markdown(f"**Analysis for question {index}:**\n{analysis}"))

    # 打印 GPT-4-turbo 的答案和解释，使用 Markdown 渲染
    display(Markdown(f"**GPT-4o Response for question {index}:**\n{response}"))


