In [16]:
'''
這份檔案可用於觀察openai家族的LLM直接生成distractors的現象，分為
1. 一次請他生成大量distractors
2. 數次請他生成小量distractors

(appendix B的內容, llama的跳針測試是透過chatbot arena: https://lmarena.ai/)
成果存於./dataset/gpt-3.5-turbo-1106_direct_inference_small_number_output.json
'''


import pandas as pd
import json
from tqdm import tqdm
import os
from transformers import BertTokenizer, BertForMaskedLM, pipeline
import numpy as np
import fasttext
import nltk
from nltk.tokenize import word_tokenize
from utils import *
import nltk
import spacy
from nltk.corpus import wordnet
from nltk import word_tokenize, pos_tag
from nltk.stem import WordNetLemmatizer
from postprocess import self_answer, self_answer_correctness, self_answer_same_meaning
import random
from openai import OpenAI

class openAIModel():
    def __init__(self, api_key):
        self.client = OpenAI(
          api_key="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",  # this is also the default, it can be omitted
        )
        
        # This two template has not been used
        self.user_template = """A good distractor for a multiple-choice question should be carefully crafted to resemble a viable option, making the decision between the correct answer and the distractors more challenging.
Distractor's word length should be close to correct answer, and distractor's pos tag should be same with correct answer, and distractor's word frequency should be close to correct answer.
A good distractor should not be the antonym of correct answer.
The goal is to ensure that test-takers who haven't thoroughly know the meaning of correct answer are more likely to select the distractor.

While the distractor should be a plausible choice, it should not fit well in the context when compared to the correct answer.\n\n"""
        self.gpt_template = """
Got it! I'll keep those guidelines in mind. What's the question you need distractors for?
"""
        self.chat = list()
        self.notify_using_model = False

    def inference(self, config):
        if config.get("temperature") is not None:
            temperature = config['temperature']
        else:
            temperature = 0

        if config.get("model_name") is not None:
            model = config['model_name']
        else:
            #model = "gpt-3.5-turbo-0125"
            model = "gpt-3.5-turbo-1106"
        # Notify user the model they are using    
        if not self.notify_using_model:
            print(f"Now inferencing with {model}")
            self.notify_using_model = True
        # for degugging: print the prompt sending to model
        #self.test_prompt()
        
        # Call GPT API and get response
        completion = self.client.chat.completions.create(
            model=model,
            messages=self.chat,
            temperature = temperature,
            n=1 # generate one choice only)
        )
        gpt_response = completion.choices[0].message.content
        #print(f"-----[GPT]-----\n{gpt_response}")
        self.clear_chat()
        return gpt_response

    
    # clear chat history
    def clear_chat(self):
        self.chat = []

    def append_chat(self, prompt, role):
        self.chat.append({
            "role": role,
            "content": prompt
        })
        
    def test_prompt(self):
        print(self.chat)

In [2]:
"""
Request LLM to generate many distractors in one request
"""
def direct_large_inference(question, model):
    question_prompt = f"""**Original Sentence**
    {question['sentence'].replace('[MASK]', "_____")}
    
**Target Word**
{question['answer']}
    
"""
    # if len(bad_distractors) != 0:
    #     question_prompt = question_prompt+"\n**Words to avoid picking**\n"
    #     for i in bad_distractors:
    #         question_prompt = question_prompt+f" {i},"
    #     # remove the last ','
    #     question_prompt = question_prompt[:-1]
        
    # if the config is set to zero shot, additional format restriction instruct is needed.
    quest_prompt = f"""generate 30 distractors for stem given in Original Sentence, restrain your output in following format given below (for example, your first three generated distractors are: apple, banana, orange)
1. apple
2. banana
3. orange
"""
    question_prompt = question_prompt + "\n" + quest_prompt
    model.append_chat(question_prompt, 'user')
    response = model.inference({})
    return response

In [3]:
def extract_response(response):
    # remove ' " in response
    response = response.replace("'", "").replace('"', "").strip()
    pattern = re.compile("\d+\. ")
    response = [x.lower().replace("\"", "") for x in pattern.sub("", response).strip().split("\n") if x != ""]
    response = list(set(response))
    return response

In [4]:
"""
Request LLM to generate little distractors over multiple requests
"""
def direct_small_inference(question, model):
    generated_words = []
    unique_generated_words = []
    while(len(generated_words) < 30):
        question_prompt = f"""**Original Sentence**
        {question['sentence'].replace('[MASK]', "_____")}
        
    **Target Word**
    {question['answer']}
        
    """
        if len(unique_generated_words) != 0:
            question_prompt = question_prompt+"\n**Words to avoid picking**\n"
            for i in unique_generated_words:
                question_prompt = question_prompt+f" {i},"
            # remove the last ','
            question_prompt = question_prompt[:-1]
            
        # if the config is set to zero shot, additional format restriction instruct is needed.
        quest_prompt = f"""generate 3 distractors for stem given in Original Sentence, restrain your output in following format given below (for example, your first three generated distractors are: apple, banana, orange)
    1. apple
    2. banana
    3. orange
    """
        question_prompt = question_prompt + "\n" + quest_prompt
        model.append_chat(question_prompt, 'user')
        response = model.inference({})
        response = extract_response(response)
        # Since this is to record the response of LLM, I do not filter out the repeated distractors
        for i in response:
            generated_words.append(i)
        unique_generated_words = list(set(generated_words))
    
    return generated_words

In [5]:
with open("processed_gsat_data.json", "r") as f:
    data = json.load(f)

In [17]:
model = openAIModel("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")
for i in tqdm(data[:2]):
    #response = direct_large_inference(i, model)
    response = direct_small_inference(i, model)
    i['llm_response'] = response


  0%|          | 0/2 [00:00<?, ?it/s]

Now inferencing with gpt-3.5-turbo-1106
[{'role': 'user', 'content': '**Original Sentence**\n        Posters of the local rock band were displayed in store windows to promote the sale of their _____ tickets.\n        \n    **Target Word**\n    concert\n        \n    \ngenerate 3 distractors for stem given in Original Sentence, restrain your output in following format given below (for example, your first three generated distractors are: apple, banana, orange)\n    1. apple\n    2. banana\n    3. orange\n    '}]
-----[GPT]-----
1. album
2. merchandise
3. tour
[{'role': 'user', 'content': '**Original Sentence**\n        Posters of the local rock band were displayed in store windows to promote the sale of their _____ tickets.\n        \n    **Target Word**\n    concert\n        \n    \n**Words to avoid picking**\n merchandise, album, tour\ngenerate 3 distractors for stem given in Original Sentence, restrain your output in following format given below (for example, your first three generate

 50%|█████     | 1/2 [00:05<00:05,  5.38s/it]

-----[GPT]-----
1. movie
2. event
3. merchandise
[{'role': 'user', 'content': '**Original Sentence**\n        Maria didn’t want to deliver the bad news to David about his failing the job interview. She herself was quite _____ about it.\n        \n    **Target Word**\n    upset\n        \n    \ngenerate 3 distractors for stem given in Original Sentence, restrain your output in following format given below (for example, your first three generated distractors are: apple, banana, orange)\n    1. apple\n    2. banana\n    3. orange\n    '}]
-----[GPT]-----
1. disappointed
2. anxious
3. concerned
[{'role': 'user', 'content': '**Original Sentence**\n        Maria didn’t want to deliver the bad news to David about his failing the job interview. She herself was quite _____ about it.\n        \n    **Target Word**\n    upset\n        \n    \n**Words to avoid picking**\n anxious, concerned, disappointed\ngenerate 3 distractors for stem given in Original Sentence, restrain your output in following

100%|██████████| 2/2 [00:11<00:00,  5.84s/it]

-----[GPT]-----
1. Frustrated
2. Disappointed
3. Heartbroken





In [14]:
print(data[0])

{'sentence': 'Posters of the local rock band were displayed in store windows to promote the sale of their [MASK] tickets.', 'answer': 'concert', 'distractors': ['journey', 'traffic', 'record'], 'llm_response': ['merchandise', 'album', 'tour', 'poster', 'cd', 't-shirt', 'event', 'movie', 'play', 'merchandise', 'event', 'movie', 'merchandise', 'event', 'movie', 'merchandise', 'event', 'movie', 'merchandise', 'event', 'movie', 'merchandise', 'event', 'movie', 'merchandise', 'event', 'movie', 'merchandise', 'event', 'movie']}


In [15]:
with open("gpt-3.5-turbo-1106_direct_inference_small_number_output.json", "w") as f:
    json.dump(data, f, indent = 2)

# 出問題的範例

In [15]:
prompt = """**Original Sentence**
In the cross-lake swimming race, a boat will be _____ in case of an
emergency.
**Target Word**
standingby
**Words to avoid picking**
brogue, twang, lilt
generate 30 distractors for stem given in Original Sentence, restrain
your output in following format given below (for example, your
first three generated distractors are: apple, banana, orange)
1. apple
2. banana
3. orange
"""

model.append_chat(prompt, 'user')

In [16]:
response = model.inference({})

[{'role': 'user', 'content': '**Original Sentence**\nIn the cross-lake swimming race, a boat will be _____ in case of an\nemergency.\n**Target Word**\nstandingby\ngenerate 30 distractors for stem given in Original Sentence, restrain\nyour output in following format given below (for example, your\nfirst three generated distractors are: apple, banana, orange)\n1. apple\n2. banana\n3. orange\n'}]
-----[GPT]-----
1. ready
2. available
3. on standby
4. prepared
5. on call
6. waiting
7. at the ready
8. on alert
9. on standby
10. on standby mode
11. on standby duty
12. on standby status
13. on standby position
14. on standby alert
15. on standby watch
16. on standby standby
17. on standby standby mode
18. on standby standby duty
19. on standby standby status
20. on standby standby position
21. on standby standby alert
22. on standby standby watch
23. on standby standby standby
24. on standby standby standby mode
25. on standby standby standby duty
26. on standby standby standby status
27. on 