# ChatGPT API Tests

### Imports

In [1]:
import base64
import requests
import os
import tqdm
import re
import pandas as pd
import json

### Functions

#### Image Encoding

In [2]:
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

#### Payload

In [3]:
def get_payload(image_path,prompt_text,detail="auto"):
    
    base64_image = encode_image(image_path)
    
    payload = {
      "model": "gpt-4-turbo",
      "messages": [
        {
          "role": "user",
          "content": [
            {
              "type": "text",
                "text": prompt_text
            },
            {
              "type": "image_url",
              "image_url": {
                "url": f"data:image/jpeg;base64,{base64_image}",
                  "detail":detail
              }
            }
          ]
        }
      ],
      "max_tokens": 300
    }
    
    return payload

### Additional Detail

In [4]:
api_key = ""

In [5]:
headers = {
  "Content-Type": "application/json",
  "Authorization": f"Bearer {api_key}"
}

### Singular Test

In [4]:
image_path = "/home/arias1/Documents/GitHub/egg_cards/paths_to_test/001-0042.png"

In [5]:
base64_image = encode_image(image_path)

In [8]:
payload = {
  "model": "gpt-4-turbo",
  "messages": [
    {
      "role": "user",
      "content": [
        {
          "type": "text",
            "text": "Extract data from this image. Fields to include are: Registration Number, Locality, Collector, Date, Set Mark and Number of eggs. The values are written inside the field boxes. Structure the response as JSON"
        },
        {
          "type": "image_url",
          "image_url": {
            "url": f"data:image/jpeg;base64,{base64_image}"
          }
        }
      ]
    }
  ],
  "max_tokens": 300
}

In [9]:
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)

In [10]:
print(response.json())

{'id': 'chatcmpl-9CoQL14fNQAa933M0ge42Dx17BCMS', 'object': 'chat.completion', 'created': 1712840793, 'model': 'gpt-4-turbo-2024-04-09', 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': '```json\n{\n  "Registration Number": "B.M.-NAT. HIST. REG. NO. 1932-11-1.7",\n  "Locality": "Matau",\n  "Collector": "H. St. J. Philby",\n  "Date": "3 August 1936",\n  "Set Mark": "1-42",\n  "Number of Eggs": "Fragments"\n}\n```'}, 'logprobs': None, 'finish_reason': 'stop'}], 'usage': {'prompt_tokens': 1154, 'completion_tokens': 90, 'total_tokens': 1244}, 'system_fingerprint': 'fp_d1bac968b4'}


### Looped Tests

#### API Call

In [6]:
prompt="Extract the data from this image. Fields to include are: Filename, Species, Registration Number, Locality, Collector, Date, Set Mark, Number of Eggs, Additional Text. The values are written inside the field boxes. Structure the response as JSON. The Species is written in the top horizontal box. The Additional Text is in the lowest horizontal box. For the Additional Text field, include all text written in the Additional Text box."

In [7]:
full_path = '/home/arias1/Documents/GitHub/egg_cards/chatgpt_tests'

all_new_responses = []
for image_path in tqdm.tqdm(os.listdir(full_path)[:20]):
    payload = get_payload(full_path+'/'+image_path,prompt)
    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
    text = response.json()['choices'][0]['message']['content']
    all_new_responses.append(text)

100%|███████████████████████████████████████████████████████████████████████████████████| 20/20 [04:11<00:00, 12.58s/it]


#### CSV Outputs

In [8]:
outputs_formatted = []

for r in all_new_responses:
    p1 = re.search('\n{\n',r).span()[0]
    p2 = re.search('\n}\n',r).span()[1]
    
    result = json.loads(r[p1:p2])

    outputs_formatted.append(result)

In [9]:
outputs_df = pd.DataFrame(outputs_formatted)

In [None]:
outputs_df.insert(0, 'Filename', os.listdir(full_path))

In [114]:
outputs_df['Filename'] = os.listdir(full_path)

In [10]:
outputs_df.to_csv('chatgpt_tests3.csv',index=False)

### Low vs Auto Detail Tests

In [None]:
path_to_images = '/home/arias1/Documents/GitHub/egg_cards/paths_to_test'

all_responses_low = []
for image_path in os.listdir(path_to_images):
    payload = get_payload(path_to_images+'/'+image_path,detail="low")
    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
    text = response.json()['choices'][0]['message']['content']
    all_responses_low.append(text)

In [66]:
outputs_auto = []
outputs_low = []

for i,r in enumerate(all_responses):
    j_a = json.loads(r[8:-3])
    try:
        j_l = json.loads(all_responses_low[i][8:-3])
    except:
        j_l = json.loads(all_responses_low[i][67:-3])

    outputs_auto.append(j_a)
    outputs_low.append(j_l)

In [131]:
pd.DataFrame(outputs_low)

Unnamed: 0,Registration Number,Locality,Collector,Date,Set Mark,Number of Eggs
0,1-42,Matru,H. St. J. Philby,3 August 1936,Fragments found,Fragments
1,1-123,,Buckley Collection,,276 ♂,1
2,1911.8.4.2,"Rio Grande do Sul, Brazil",J. Fothering,-,-,2
3,1-96,Somaliland,Old Collection,,,1


In [70]:
pd.DataFrame(outputs_auto)

Unnamed: 0,Registration Number,Locality,Collector,Date,Set Mark,Number of Eggs,Number of eggs
0,1932.11.1.7,Matau,H. St. J. Philby,3 August 1936,1-42,Fragments,
1,1-123,,Buckley Collection,,276 ♂,,1.0
2,1842.9.6.2,"Rio Grande Do Sul South, Brazil",Dr von Ihering,,,2,
3,1-96,Somaliland,Old Collection,,,1,
