# 用 ChatGPT 產生 MCQ 題目的 Distractor

## Import 

In [1]:
!pip install openai

Defaulting to user installation because normal site-packages is not writeable


In [1]:
import json

## Read MCQ File

In [2]:
def read_data(item):
    path = '../data/mcq/total_new_cleaned_{}.json'.format(item)
    with open(path) as f:
        data = json.load(f)
    return data

In [3]:
train = read_data('train')
test = read_data('test')

In [5]:
train[0]

{'answer': 'gravity',
 'distractors': ['friction', 'erosion', 'magnetism'],
 'sentence': '**blank** causes rocks to roll downhill'}

In [6]:
sentence = train[0]['sentence']
answer = train[0]['answer']

## Prompt 設定
Prompt 1 = "I will give you a cloze test sentence and answer, please generate three token level distractors that can fit in the blank."

In [7]:
prompt1 = 'I will give you a cloze test sentence and answer, please generate three token level distractors that can fit in the blank.'
prompt1 = prompt1 + "\nsentence = {}. answer = {}.".format(sentence, answer)

In [8]:
print(prompt1)

I will give you a cloze test sentence and answer, please generate three token level distractors that can fit in the blank.
sentence = **blank** causes rocks to roll downhill. answer = gravity.


Prompt 2 = "Given a sentence and its corresponding answer, please generate 10 plausible but incorrect options based on the given question, with each option differing from the correct answer. Additionally, the length of each false option should not exceed one word."


給定一段句子和對應句子的答案，請根據給定的題目生成10筆似是而非的錯誤選項並且單字長度不能超過1。

In [10]:
prompt2 = 'Given a sentence and its corresponding answer, please generate 10 plausible but incorrect options based on the given question, with each option differing from the correct answer. Additionally, the length of each false option should not exceed one word.'
prompt2 = prompt2 + "\nsentence = {}. answer = {}.".format(sentence, answer)

In [11]:
print(prompt2)

Given a sentence and its corresponding answer, please generate 10 plausible but incorrect options based on the given question, with each option differing from the correct answer. Additionally, the length of each false option should not exceed one word.
sentence = **blank** causes rocks to roll downhill. answer = gravity.


### 基本範例 使用 text-davinci-003

In [14]:
import openai

openai.api_key = 'your.openapi.key'
response = openai.Completion.create(
    model="text-davinci-003",
    prompt=prompt2,
    max_tokens=128,
    temperature=0.5,
)

completed_text = response["choices"][0]["text"]
print(completed_text)



1. Lift
2. Force
3. Tilt
4. Weight
5. Thrust
6. Pressure
7. Incline
8. Inertia
9. Motion
10. Traction


#### 每次都會生成不同的選項

In [38]:
response = openai.Completion.create(
    model="text-davinci-003",
    prompt=prompt2,
    max_tokens=128,
    temperature=0.5,
)

completed_text = response["choices"][0]["text"]
print(completed_text)



1. force
2. momentum
3. attraction


In [15]:
for i,item in enumerate(train[:5]):
    sentence = item['sentence']
    answer = item['answer']
    prompt = "sentence = {}. answer = {}.".format(sentence, answer)
    print(prompt,'\n')

sentence = **blank** causes rocks to roll downhill. answer = gravity. 

sentence = **blank** of measurement describes an object 's length. answer = meter. 

sentence = **blank** is used to determine an object 's texture. answer = touch. 

sentence = **blank** is found in food. answer = chemical. 

sentence = A student crumples up a sheet of paper.  **blank**  property of the paper has changed. answer = shape. 



### Generate Distractors Candidate Sets by ChatGPT API

In [None]:
"""
train
0~87
88~1211
1212~1717
"""

In [1]:
import openai
import json
# openai.api_key = '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'

In [2]:
def read_data(item):
    path = '../data/mcq/total_new_cleaned_{}.json'.format(item)
    with open(path) as f:
        data = json.load(f)
    return data

In [3]:
train = read_data('train')
test = read_data('test')

In [4]:
len(train), len(test)

(2321, 259)

In [5]:
candidates_set = []
response_set = []
for i,item in enumerate(test):
    sentence = item['sentence']
    answer = item['answer']
    distractors = item['distractors']
    prompt = "sentence = {}. answer = {}.".format(sentence, answer)
    print(0 + i)
    print(item)
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
                {"role": "system", "content": "Given a sentence and its corresponding answer, please generate 10 plausible but incorrect options based on the given question, with each option differing from the correct answer. Additionally, the length of each false option should not exceed one word."},
                {"role": "user", "content": prompt }
            ]
    )
    response_context = response['choices'][0]['message']['content']
    print(response_context)
    print('*'*20)
    candidates_set.append({'sentence':sentence, 'distractors':distractors, 'answer':answer, 'candidates':response_context})
    response_set.append(response)

0
{'answer': 'pheromone', 'distractors': ['enzyme', 'isolate', 'amino'], 'sentence': '**blank** is used to describe a chemical released by an animal that affects the behavior or physiology of animals of the same species'}
1. hormone
2. insulin
3. chemical
4. phlegm
5. empathy
6. protein
7. epinephrine
8. podagra
9. aspirin
10. enzyme
********************
1
{'answer': 'autotrophic', 'distractors': ['endoscopic', 'symbiotic', 'mutualistic'], 'sentence': 'Producers at the base of ecological food webs are also known as **blank**'}
1. Allophagic
2. Heterotrophic
3. Omnisapic
4. Granivorous
5. Frugivorous
6. Mycophagous
7. Coprophagous
8. Carnivorous
9. Herbivorous
10. Necrophagous
********************
2
{'answer': 'ozone', 'distractors': ['dioxide', 'carbon', 'sulphur'], 'sentence': 'the term for the gas in smog that can damage plants is **blank**'}
1. Nitrogen
2. Carbon
3. Oxygen
4. Sulphur
5. Lithium
6. Chlorine
7. Helium
8. Fluorine
9. Neon
10. Argon
********************
3
{'answer': 'lu

In [6]:
len(candidates_set), len(response_set)

(259, 259)

In [7]:
candidates_set

[{'sentence': '**blank** is used to describe a chemical released by an animal that affects the behavior or physiology of animals of the same species',
  'distractors': ['enzyme', 'isolate', 'amino'],
  'answer': 'pheromone',
  'candidates': '1. hormone\n2. insulin\n3. chemical\n4. phlegm\n5. empathy\n6. protein\n7. epinephrine\n8. podagra\n9. aspirin\n10. enzyme'},
 {'sentence': 'Producers at the base of ecological food webs are also known as **blank**',
  'distractors': ['endoscopic', 'symbiotic', 'mutualistic'],
  'answer': 'autotrophic',
  'candidates': '1. Allophagic\n2. Heterotrophic\n3. Omnisapic\n4. Granivorous\n5. Frugivorous\n6. Mycophagous\n7. Coprophagous\n8. Carnivorous\n9. Herbivorous\n10. Necrophagous'},
 {'sentence': 'the term for the gas in smog that can damage plants is **blank**',
  'distractors': ['dioxide', 'carbon', 'sulphur'],
  'answer': 'ozone',
  'candidates': '1. Nitrogen\n2. Carbon\n3. Oxygen\n4. Sulphur\n5. Lithium\n6. Chlorine\n7. Helium\n8. Fluorine\n9. Ne

In [7]:
1718+603-1

2320

In [8]:
1212+506-1

1717

In [None]:
with open('chatgpt.train.candidate.json','w') as f:
    json.dump(candidates_set,f)

In [9]:
with open('chatgpt_train_augmentation_set(response).json','w') as f:
    json.dump(response_set,f)

In [None]:
"""
You are a helpful distractors generation assistant. The goal is to create plausible false answers based on given cloze test sentences and answers.
I will give you a cloze test sentence and answer, please generate top ten token level distractors that can fit in the blank. The goal is to create plausible false answers.
"""