In [None]:
from OpenAI_agent import OpenAIGPT
import json
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm


def process_row(row):
    try:
        prompt = "Translate the following text to English: "
        content = igpt(prompt + row['content'])
        label_mapping = {0: "Extremely Negative", 1: "Negative", 2: "Neutral", 3: "Positive", 4: "Extremely Positive"}
        label = label_mapping[row['label']]

        return {
            "content": row['content'],
            "translated_content": content,
            "label": label
        }
 
    except Exception as e:
        print(f"Error processing row: {e}")
        return None


def generate_dataset(df, max_samples=10):
    global igpt
    igpt = OpenAIGPT(model_name='gpt-3.5-turbo', keys_path='../apikey.txt')
    df_subset = df.head(max_samples)
    results = []
    with ThreadPoolExecutor(max_workers=10) as executor:
        futures = [executor.submit(process_row, row) for _, row in df_subset.iterrows()]
        for future in tqdm(futures, total=len(futures), desc="Processing rows"):
            result = future.result()
            if result:
                results.append(result)

    return results

In [None]:
dataset = generate_dataset(data, max_samples=100)
output_file_path = 'translated.json'
with open(output_file_path, 'w', encoding='utf-8') as f:
    json.dump(dataset, f, ensure_ascii=False, indent=2)