In [1]:
# run sentiment analysis for OpenAI API and Azure API

from utils import TestDataset, save_to_csv, task_prompt
from tqdm import tqdm
import pandas as pd 
from concurrent.futures import ThreadPoolExecutor
from torch.utils.data import DataLoader
from openai import OpenAI,AzureOpenAI

def inference(prompt, client, model):
    try:
        response = client.chat.completions.create(
            model=model,
            # messages=[{"role": "system", "content": task_prompt}, 
            #           {"role": "user", "content": prompt}],
            messages=[{"role": "user", "content": prompt}], 
            max_tokens=4,
            temperature=0
        )
        return response.choices[0].message.content
    except Exception as e:
        print("!!!!! Error when processing prompt !!!!!")
        print(prompt)
        # print(f"!!!!! {e} !!!!!")
        return None

def batch_inference(batch_data, client, model, max_workers=5):
    results = []
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = list(
            tqdm(
                executor.map(lambda x: inference(x, client, model), batch_data),
                total=len(batch_data)
            )
        )
        for future in futures:
            results.append(future)
    return results

In [4]:
# use appropriate dataset for inference
# data_path = "data/train_easy.csv"
data_path = "data/train_hard.csv"
# data_path = "data/FPB.csv"

print("----- Load Dataset -----")
test_dataset = TestDataset(data_path, use_template=True)
batch_data = [test_dataset[i]['sentence'] for i in range(len(test_dataset))]

print("----- Inference Deepseek -----")
deepseek = OpenAI(api_key="<your deepseek api>", base_url="https://api.deepseek.com")
deepseek_outputs = batch_inference(batch_data, deepseek, "deepseek-chat", 16)
save_to_csv(data_path, "deepseek", deepseek_outputs)

print("----- Inference Azure GPT-4O-Mini -----")
gpt = AzureOpenAI(api_key="<your azure openai api>", api_version="2023-06-01-preview", azure_endpoint="https://openai-oe.openai.azure.com/")
gpt_outputs = batch_inference(batch_data, gpt, "gpt-4o-mini", 16)
save_to_csv(data_path, "openai", gpt_outputs)

----- Load Dataset -----
----- Inference Deepseek -----


100%|██████████| 100/100 [00:10<00:00,  9.24it/s]


----- Inference Azure GPT-4O-Mini -----


100%|██████████| 100/100 [00:05<00:00, 17.59it/s]
