In [11]:
import os
import json
import base64

base_output_dir = './output/openai-input-w-setting'
for task in ['matching', 'generation']:
    os.makedirs(os.path.join(base_output_dir, task), exist_ok=True)

In [12]:
# Function to encode the image
def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

In [13]:
# for matching
base_dir = '/home/ubuntu/MMSci/mmsci-data/'
image_dir = os.path.join(base_dir, 'benchmark/test/images/')
input_filename = os.path.join(base_dir, 'benchmark/test/image_caption_matching_data.json')

In [14]:
import requests

api_key='xxx'

headers = {
  "Content-Type": "application/json",
  "Authorization": f"Bearer {api_key}"
}

data = json.load(open(input_filename, 'r'))
item = data[0][0]
base64_image = encode_image(os.path.join(image_dir, item['image']))
model = 'gpt-4o'

payload = {
  "model": model,
  "messages": [
      {
      "role": "user",
      "content": [
          {
          "type": "text",
          "text": item['question'] + ' Answer only with A, B, C, or D.',
          },
          {
          "type": "image_url",
          "image_url": {
              "url": f"data:image/jpeg;base64,{base64_image}"
          }
          }
      ]
      }
  ],
  "max_tokens": 10,
  "n": 5,
  "temperature": 0.7,
}

response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
print(response.json())

{'id': 'chatcmpl-9XMhAhsg0xrOWt7xsInhI6IzKTFxO', 'object': 'chat.completion', 'created': 1717739092, 'model': 'gpt-4o-2024-05-13', 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': 'B'}, 'logprobs': None, 'finish_reason': 'stop'}, {'index': 1, 'message': {'role': 'assistant', 'content': 'B'}, 'logprobs': None, 'finish_reason': 'stop'}, {'index': 2, 'message': {'role': 'assistant', 'content': 'B'}, 'logprobs': None, 'finish_reason': 'stop'}, {'index': 3, 'message': {'role': 'assistant', 'content': 'B'}, 'logprobs': None, 'finish_reason': 'stop'}, {'index': 4, 'message': {'role': 'assistant', 'content': 'B'}, 'logprobs': None, 'finish_reason': 'stop'}], 'usage': {'prompt_tokens': 526, 'completion_tokens': 5, 'total_tokens': 531}, 'system_fingerprint': 'fp_aa87380ac5'}


In [15]:
# for matching

def get_prompt_for_matching(question, w_cot):
    final_instr = 'Answer only with A, B, C, or D.'
    cot_prompt = 'Please first thoroughly analyze and think about this problem, and then come to your final answer.'
    if not w_cot:
        prompt = f'{question}\n{final_instr}'
    else:
        prompt = f'{question}\n{cot_prompt}'
    return prompt

def form_openai_input_for_matching(model, w_cot):
    all_data = json.load(open(input_filename, 'r'))
    output_filename = os.path.join(base_output_dir, 'matching', f"{model}_{'w' if w_cot else 'wo'}-cot_{input_filename.split('/')[-1]}l")
    print(output_filename)
    
    with open(output_filename, 'w') as fout:
        for setting, data in enumerate(all_data):
            for idx, item in enumerate(data):
                base64_image = encode_image(os.path.join(image_dir, item['image']))
                info = {
                    "custom_id": f'{setting+1}_{str(idx)}',
                    "method": "POST", 
                    "url": "/v1/chat/completions", 
                    "body": {
                        "model": model,
                        "messages": [
                            {
                            "role": "user",
                            "content": [
                                {
                                "type": "text",
                                "text": get_prompt_for_matching(item['question'], w_cot),
                                },
                                {
                                "type": "image_url",
                                "image_url": {
                                    "url": f"data:image/jpeg;base64,{base64_image}"
                                }
                                }
                            ]
                            }
                        ],
                        "max_tokens": 1024 if w_cot else 10,
                        "n": 5 if w_cot else 1,
                        "temperature": 0.7,
                    }
                }
                fout.write(json.dumps(info) + '\n')

In [16]:
for model in ['gpt-4o', 'gpt-4-turbo']:
    for w_cot in [True, False]:
        form_openai_input_for_matching(model, w_cot)

./openai-input-w-setting/matching/gpt-4o_w-cot_image_caption_matching_data.jsonl
./openai-input-w-setting/matching/gpt-4o_wo-cot_image_caption_matching_data.jsonl
./openai-input-w-setting/matching/gpt-4-turbo_w-cot_image_caption_matching_data.jsonl
./openai-input-w-setting/matching/gpt-4-turbo_wo-cot_image_caption_matching_data.jsonl


In [18]:
from openai import OpenAI
client = OpenAI(api_key='xxx')

In [19]:
def create_batch_job(filename):
    print(f"Creating batch job for {filename}")

    # upload input file to openai server
    batch_input_file = client.files.create(
    file=open(filename, "rb"),
    purpose="batch"
    )

    batch_input_file_id = batch_input_file.id
    print(batch_input_file_id)

    # create batch job
    job = client.batches.create(
        input_file_id=batch_input_file_id,
        endpoint="/v1/chat/completions",
        completion_window="24h",
        metadata={
        "description": filename.split('/')[-1]
        }
    )
    print(f"Batch job ID:\t{job.id}")

In [17]:
# for captioning
base_dir = '/home/ubuntu/MMSci/mmsci-data/'
image_dir = os.path.join(base_dir, 'benchmark/test/images/')
input_filename = os.path.join(base_dir, 'benchmark/test/image_caption_generation_data.json')

In [31]:
# for captioning

import tiktoken
from tqdm import tqdm

embedding_encoding = "cl100k_base"
encoding = tiktoken.get_encoding(embedding_encoding)
max_tokens = 125000 


def truncate(x, max_len=max_tokens):
    token_list = encoding.encode(x)[:max_len]
    return encoding.decode(token_list)


def format_prompt(abstract, content, with_abstract, with_content):
    input_content = ''
    if with_abstract:
        input_content += f'Article:\n{abstract}\n'
    if with_content:
        input_content += f'Article:\n{content}\n'
    prompt = f'Please write a detailed description of the given scientific figure based on the following content:\n{input_content}'
    prompt = truncate(prompt, max_tokens)
    return prompt


def form_openai_input_for_captioning(model, with_abstract, with_content):
    tag = f'{"wo" if not with_abstract else "w"}_abstract_{"wo" if not with_content else "w"}_content'

    data = json.load(open(input_filename, 'r'))
    output_filename = os.path.join(base_output_dir, 'generation', f"{model}_{tag}_{input_filename.split('/')[-1]}l")

    with open(output_filename, 'w') as fout:
        for idx, item in tqdm(list(enumerate(data)), total=len(data), desc='prepare data'):
            base64_image = encode_image(os.path.join(image_dir, item['image']))
            info = {
                "custom_id": str(idx), 
                "method": "POST", 
                "url": "/v1/chat/completions", 
                "body": {
                    "model": model,
                    "messages": [
                        {
                        "role": "user",
                        "content": [
                            {
                            "type": "text",
                            "text": format_prompt(item['abstract'], item['content'], with_abstract, with_content),
                            },
                            {
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:image/jpeg;base64,{base64_image}"
                            }
                            }
                        ]
                        }
                    ],
                    "max_tokens": 1024,
                    }
            }
            fout.write(json.dumps(info) + '\n')


In [32]:
for model in ['gpt-4o', 'gpt-4-turbo']:
    for with_abstract in [True, False]:
        for with_content in [True, False]:
            if with_abstract and with_content:
                continue
            form_openai_input_for_captioning(model, with_abstract, with_content)

prepare data: 100%|██████████| 1281/1281 [00:01<00:00, 885.92it/s]
prepare data: 100%|██████████| 1281/1281 [00:13<00:00, 93.08it/s]
prepare data: 100%|██████████| 1281/1281 [00:01<00:00, 1124.11it/s]
prepare data: 100%|██████████| 1281/1281 [00:01<00:00, 884.44it/s]
prepare data: 100%|██████████| 1281/1281 [00:13<00:00, 95.19it/s]
prepare data: 100%|██████████| 1281/1281 [00:01<00:00, 1186.08it/s]


In [23]:
# split a file into several subfiles

def chunks(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i:i + n]

def split_file(input_dir, output_dir, filename, n=4):
    with open(os.path.join(input_dir, filename), 'r') as fin:
        lines = fin.readlines()
    line_chunks = list(chunks(lines, len(lines)//n))
    for idx, chunk in enumerate(line_chunks):
        with open(os.path.join(output_dir, filename.replace('.jsonl', f'_{idx}.jsonl')), 'w') as fout:
            fout.writelines(chunk)

In [33]:
import math

for task in ['matching', 'generation']:
    input_dir = os.path.join(base_output_dir, task)
    output_dir = os.path.join(base_output_dir, f'{task}_chunked')
    os.makedirs(output_dir, exist_ok=True)

    for file in os.listdir(input_dir):
        file_size = os.path.getsize(os.path.join(input_dir, file)) >> 20
        num_chunk = math.ceil(file_size/100)
        split_file(input_dir, output_dir, file, n=num_chunk)

In [None]:
for task in ['matching', 'generation']:
    file_dir = os.path.join(base_output_dir, f'{task}_chunked')
    for file in os.listdir(file_dir):
        create_batch_job(os.path.join(file_dir, file))

In [68]:
import os
import json

base_input_dir = './output/openai_raw/matching'
output_dir = './output/image_caption_matching'

input_data = json.load(open('../../mmsci-data/benchmark/test/image_caption_matching_data.json'))
input_data_mapping = {i: item for i, item in enumerate(input_data)}

In [70]:
for file in os.listdir(base_input_dir):
    output_list = []
    with open(os.path.join(base_input_dir, file), 'r') as fin:
        for line in fin.readlines():
            info = json.loads(line)
            key = int(info['custom_id'])
            prediction = info['response']['body']["choices"][0]['message']['content']
            info = input_data_mapping[key]
            info['prediction'] = prediction
            output_list.append(info)
    with open(os.path.join(output_dir, file), 'w') as fout:
        json.dump(output_list, fout, indent=4)