In [3]:
import numpy as np
import itertools
import warnings
import logging
from datasets import Dataset
from transformers.pipelines.pt_utils import KeyDataset
from tqdm import tqdm
import os
from pattern.en import pluralize
import pickle5 as pickle

import csv
import time
from time import sleep
from pathlib import Path
from joblib import Parallel, delayed

import torch
import openai
import transformers
from accelerate import Accelerator

  from .autonotebook import tqdm as notebook_tqdm


In [17]:
def timer(start_time):
    """the helper function for running time"""
    minutes, seconds = divmod(time.time() - start_time, 60)
    return f"{int(minutes)} mins {int(seconds)} sec"

def send_gpt_prompt(batch, model_type, prompt_and_response, temperature, max_tokens):
    """helper function to send a whole to chatgpt"""
    for prompt in batch:
        succeed = False
        completion = None
        while not succeed:
            try:
                completion = openai.Completion.create(
                    engine = model_type,
                    prompt = prompt,
                    max_tokens = max_tokens,
                    n = 1,
                    # temperature = temperature,
                )
                succeed = True
            except Exception as e:
                print("GPT sleeping...")
                sleep(60)
        assert completion is not None
        response = completion['choices'][0]['text'].replace('\n', ' ').replace(' .', '.').strip()
        prompt_and_response.append([prompt, response, response.split(" ")[-1][:-1]])
        
def generate_responses_gpt(batches, model_type, output_path, temperature, max_tokens):
    start_time = time.time()
    
    openai.api_key = Path(f"../AZURE_OPENAI_KEY").read_text()
    openai.api_base = Path(f"../AZURE_OPENAI_ENDPOINT").read_text()
    openai.api_type = 'azure'
    openai.api_version = '2023-05-15'
        
    prompt_and_response = []
    
    # can change n_jobs accoding to the size of dataset
    Parallel(n_jobs = 10, require='sharedmem')(delayed(send_gpt_prompt)(batch, model_type, prompt_and_response, temperature, max_tokens) for batch in batches)
    Parallel(n_jobs = 10, require='sharedmem')(
        delayed(send_gpt_prompt)(
            batch, model_type, prompt_and_response, temperature, max_tokens
        ) for batch in batches
    )
    
    print(f'Time taken to generate responses is {timer(start_time)}s')
    
    # write_responses(prompt_and_response, output_path, 'w')
    
    return prompt_and_response

In [19]:
batches = [[
    "Answer using only one word - Alligator or Spanner and not Shovel. Which is more similar in meaning to Shovel? End your answer with, 'The answer is '. Let's think step by step.",
    "Answer using only one word - Gecko or Salamander and not Alligator. Which is more similar in meaning to Alligator? End your answer with, 'The answer is '. Let's think step by step."
]]

generate_responses_gpt(batches, "gpt-semantics", './test.csv', 0.7, 256)

Time taken to generate responses is 0 mins 2 secs


[["Answer using only one word - Alligator or Spanner and not Shovel. Which is more similar in meaning to Shovel? End your answer with, 'The answer is '. Let's think step by step.",
  'The answer is alligator.',
  'alligator'],
 ["Answer using only one word - Gecko or Salamander and not Alligator. Which is more similar in meaning to Alligator? End your answer with, 'The answer is '. Let's think step by step.",
  'Gecko  The answer is gecko.',
  'gecko'],
 ["Answer using only one word - Alligator or Spanner and not Shovel. Which is more similar in meaning to Shovel? End your answer with, 'The answer is '. Let's think step by step.",
  'The answer is Alligator.',
  'Alligator'],
 ["Answer using only one word - Gecko or Salamander and not Alligator. Which is more similar in meaning to Alligator? End your answer with, 'The answer is '. Let's think step by step.",
  'Alligator  The first thing that comes to mind when thinking about the word alligator is that it is a large reptile. This immed

In [20]:
generate_responses_gpt(batches, "text-davinci-003", './test.csv', 0.7, 256)

Time taken to generate responses is 0 mins 4 secs


[["Answer using only one word - Alligator or Spanner and not Shovel. Which is more similar in meaning to Shovel? End your answer with, 'The answer is '. Let's think step by step.",
  'A shovel is a tool that is typically used for moving dirt or for digging. A spanner is also a type of tool, often used for mechanical construction. The answer is Spanner.',
  'Spanner'],
 ["Answer using only one word - Gecko or Salamander and not Alligator. Which is more similar in meaning to Alligator? End your answer with, 'The answer is '. Let's think step by step.",
  "The word 'alligator' is usually associated with a reptile, so the other two words must also be reptiles. 'Salamander' is a type of amphibian and 'gecko' is a type of lizard. Therefore, the answer is Gecko. The answer is Gecko.",
  'Gecko'],
 ["Answer using only one word - Alligator or Spanner and not Shovel. Which is more similar in meaning to Shovel? End your answer with, 'The answer is '. Let's think step by step.",
  'The answer is S