In [21]:
import pandas as pd
import pandas as pd
import json
import random
from retrying import retry
import openai
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

openai.api_base = "https://one.aiskt.com/v1"


class OpenAIGPT:
    def __init__(self, model_name="gpt-3.5-turbo", keys_path=None):
        self.model_name = model_name
        with open(keys_path, encoding="utf-8", mode="r") as fr:
            self.keys = [line.strip() for line in fr if len(line.strip()) >= 4]

    def __post_process(self, response):
        return response["choices"][0]["message"]["content"]

    @retry(wait_fixed=300, stop_max_attempt_number=50)
    def __call__(self, message):
        if message is None or message == "":
            return False, "Your input is empty."

        # current_key = random.choice(self.keys)
        current_key = self.keys[0] if len(self.keys) == 1 else random.choice(self.keys)
        openai.api_key = current_key
        response = openai.ChatCompletion.create(
            model=self.model_name,
            messages=[{"role": "user", "content": message}],
            temperature=0.3,
            top_p=0.1,
            frequency_penalty=0.6,
            presence_penalty=0.6,
            n=1,
        )
        return self.__post_process(response)


igpt = OpenAIGPT(keys_path="../apikey.txt")

In [22]:
igpt('你好')

'你好！有什么可以帮助你的吗？'

### generating gpt's answer


In [23]:
def process_row(row):
    try:
        instruction = row['prompt']
        label = row['expected']

        res = igpt(f'{instruction}')

        return {
            "label": label,
            "predict": res
        }
    except Exception as e:
        print(f"Error processing row: {e}")
        return None


def generate_dataset(data, max_samples=100):
    global igpt
    igpt = OpenAIGPT(model_name='gpt-3.5-turbo', keys_path='../apikey.txt')
    results = []
    with ThreadPoolExecutor(max_workers=10) as executor:
        futures = [executor.submit(process_row, row) for row in data[:max_samples]]
        for future in tqdm(futures, total=len(futures), desc="Processing rows"):
            result = future.result()
            if result:
                results.append(result)

    return results

In [24]:
test_data = 'test_dataset/results.json'
with open(test_data, 'r') as file:
    data = json.load(file)
random.shuffle(data)

dataset = generate_dataset(data, max_samples=500)

Processing rows: 100%|██████████| 500/500 [04:04<00:00,  2.05it/s]


In [25]:
output_file_path = 'test_dataset/evaluation_results_gpt.jsonl'
with open(output_file_path, 'w', encoding='utf-8') as f:
    for item in dataset:
        json.dump(item, f, ensure_ascii=False)
        f.write('\n')

In [26]:
cnt = 0
for line in dataset:
    if line['label'] in line['predict']:
        cnt += 1
print(f"Accuracy: {cnt/len(dataset)}")

Accuracy: 0.35
