In [1]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import os
import json
from numpy.polynomial.polynomial import polyfit
from scipy.stats import pearsonr



In [2]:
# Latex setting 
matplotlib.use("pgf")
matplotlib.rcParams.update({
    "pgf.texsystem": "pdflatex",
    'font.family': 'serif',
    'text.usetex': True,
    'pgf.rcfonts': False,
    'figure.autolayout' : True,
    'font.size': 10,
    'axes.titlesize': 10,
    'figure.max_open_warning': 0
})

In [3]:
# Load case study results
df = pd.read_csv("raw_results.csv")

# Load original and generated quizzes
original_quiz_path = os.path.abspath(os.getcwd()).split('user_centric_experiment')[0] + 'processed_data/gpt3/completion_6/processed_test.json'
generated_quiz_path = os.path.abspath(os.getcwd()).split('user_centric_experiment')[0] + 'generated_data_gpt3/experiment_6/generated_quiz.json'

In [4]:
# Get prompts
prompts = []

for line in open(original_quiz_path):
    prompts.append(json.loads(line)['prompt'].split('\n\n###')[0].strip())

# Get original quizzes
original_quizzes = []

for line in open(original_quiz_path):
    original_quizzes.append(json.loads(line)['completion'].split('\n###')[0].strip())
    
# Get generated quizzes
generated_quizzes = []
with open(generated_quiz_path) as f:
    generated_quizzes_dict = json.load(f)

for key in generated_quizzes_dict:
    generated_quizzes.append(generated_quizzes_dict[key].strip())

In [5]:
# Test ID's for which we tested the user on
sampled_idx = [0, 54, 63, 126, 135, 144, 153, 171, 279, 351, 414, 450, 531, 639, 648, 756, 774, 819, 828, 882]

In [6]:
# Order everything and clean up
labels = df.keys().tolist()
questions = df.iloc[0].tolist()
opinion_questions = df.iloc[0].tolist()[-15:]
opinion_questions_labels = labels[-15:]

opinion_questions_dict = dict(zip(opinion_questions_labels, opinion_questions))

df = df[1:]

In [7]:
# Original quiz answers
orq_answers = ["Because her family was very poor.",
              "He and his friends were helping his neighbor moving a couch.",
              "Byron Bay and Sydney",
              "somewhat interact with the students",
              "A language learning environment.",
              "150",
              "The Australian government tries to make immigrants feel at home.",
              "Rates of death from illnesses have risen due to global warming.",
              "They had to wait a long time and play catch-up when checking in.",
              "Spending much time on Facebook affected her study",
              "China allows climbers of any age to climb Mt. Everest.",
              "Going to see a doctor.",
              "Planets can receive heat generated deep inside the planet.",
              "The rock becomes unclear.",
              "A book.",
              "Milk",
              "Maths.",
              "A bike.",
              "He helped the traffic police at a crossroad.",
              "The film You Are the Apple of My Eye."]

In [8]:
# Generated quiz answers
geq_answers = ["Sorry.",
              "She disappeared from the spot where she was playing.",
              "You can have a beer in Germany or drink a hot dog in America.",
              "The future is uncertain.",
              "Connecting with native speakers.",
              "The early-warning system.",
              "Its beautiful beaches.",
              "He has serious doubts about it.",
              "It took a long time to open.",
              "About 83% of them.",
              "To wear a piece of rock from the top of the world.",
              "Poor nutrition.",
              "Water will exist underground as long as the planet is not too hot or too cold.",
              "Air movements in the earth's atmosphere.",
              "Because it is a way to relax themselves.",
              "Tea.",
              "Because he thought maths was interesting.",
              "August 5th.",
              "He helped others.",
              "He took drugs with some friends."]

# Section 8.1 - Grade Distributions

In [9]:
# Calculate the grade for each participant
orq_grades = []
geq_grades = []

for idx, row in df.iterrows():
    given_orq_answers = row[["ORQ1","ORQ2","ORQ3","ORQ4","ORQ5","ORQ6","ORQ7","ORQ8","ORQ9","ORQ10","ORQ11","ORQ12","ORQ13","ORQ14","ORQ15","ORQ16","ORQ17","ORQ18","ORQ19","ORQ20"]].tolist()
    given_geq_anwers = row[["GEQ1","GEQ2","GEQ3","GEQ4","GEQ5","GEQ6","GEQ7","GEQ8","GEQ9","GEQ10","GEQ11","GEQ12","GEQ13","GEQ14","GEQ15","GEQ16","GEQ17","GEQ18","GEQ19","GEQ20"]].tolist()
    
    orq_grade = round(len(set(given_orq_answers) & set(orq_answers)) / len(given_orq_answers) * 10)
    geq_grade = round(len(set(given_geq_anwers) & set(geq_answers)) / len(given_geq_anwers) * 10)
    
    orq_grades.append(orq_grade)
    geq_grades.append(geq_grade)

In [10]:
# Grade Distribution Original quizzes
fig, ax = plt.subplots()
ax.set_ylim([0, 24])
plt.yticks(np.arange(0, 24+1, 6))

values, _, _ = plt.hist(orq_grades, rwidth=.8, bins=np.arange(0, 10+2) - 0.5)

plt.xticks(np.arange(0, 10+1, 1.0))
plt.xlabel('Grade')
plt.ylabel('Count')

rects = ax.patches
values = [int(x) for x in values]
  
for rect, label in zip(rects, values):
    height = rect.get_height()
    ax.text(rect.get_x() + rect.get_width() / 2, height+0.25, label,
            ha='center', va='bottom')

plt.tight_layout()
fig.set_size_inches(w=3.2, h=2.2)    

ax.set_title('(a) Original Quizzes', y=0, pad=-35, verticalalignment="top")

plt.savefig('figures/grade_distribution_original_quizzes.pgf')    

In [11]:
# Grade Distribution Generated quizzes
fig, ax = plt.subplots()
ax.set_ylim([0, 24])
plt.yticks(np.arange(0, 24+1, 6))

values, _, _ = plt.hist(geq_grades, rwidth=.8, bins=np.arange(0, 10+2) - 0.5)

plt.xticks(np.arange(0, 10+1, 1.0))
plt.xlabel('Grade')
plt.ylabel('Count')

# Make some labels.
rects = ax.patches
values = [int(x) for x in values]
  
for rect, label in zip(rects, values):
    height = rect.get_height()
    ax.text(rect.get_x() + rect.get_width() / 2, height+0.25, label,
            ha='center', va='bottom')

plt.tight_layout()
fig.set_size_inches(w=3.2, h=2.2)    

ax.set_title('(b) EduQuiz-Generated Quizzes', y=0, pad=-35, verticalalignment="top")

plt.savefig('figures/grade_distribution_generated_quizzes.pgf')    

# Section 8.2 - Unanswerable Quizzes

In [12]:
# Figure
# x-axis questions
# y-axis amount of student who got it right

orx_axis = ["ORQ1","ORQ2","ORQ3","ORQ4","ORQ5","ORQ6","ORQ7","ORQ8","ORQ9","ORQ10","ORQ11","ORQ12","ORQ13","ORQ14","ORQ15","ORQ16","ORQ17","ORQ18","ORQ19","ORQ20"]
ory_axis = []
ory_axis_unans = []
ory_axis_false = []

gex_axis = ["GEQ1","GEQ2","GEQ3","GEQ4","GEQ5","GEQ6","GEQ7","GEQ8","GEQ9","GEQ10","GEQ11","GEQ12","GEQ13","GEQ14","GEQ15","GEQ16","GEQ17","GEQ18","GEQ19","GEQ20"]
gey_axis = []
gey_axis_unans = []
gey_axis_false = []

for i in range(len(orq_answers)):
    count_right_answers = df[orx_axis[i]].value_counts()[orq_answers[i]]
    
    try:
        count_unanswerable = df[orx_axis[i]].value_counts()["This question is unanswerable."]
    except:
        count_unanswerable = 0
        
    count_false = len(df) - count_right_answers - count_unanswerable
    
    ory_axis.append(count_right_answers)
    ory_axis_unans.append(count_unanswerable)
    ory_axis_false.append(count_false)

for i in range(len(geq_answers)):
    try:
        count_right_answers = df[gex_axis[i]].value_counts()[geq_answers[i]]
    except:
        count_right_answers = 0
        
    try:
        count_unanswerable = df[gex_axis[i]].value_counts()["This question is unanswerable."]
    except:
        count_unanswerable = 0
        
    count_false = len(geq_answers) - count_right_answers - count_unanswerable
    
    gey_axis.append(count_right_answers)
    gey_axis_unans.append(count_unanswerable)
    gey_axis_false.append(count_false)

In [13]:
# Original Quizzes that are unanswerable
fig, ax = plt.subplots()

plt.yticks(np.arange(0, 24+1, 6))
plt.xticks(rotation=45, ha='right')

plt.xlabel("Question ID")
plt.ylabel("Count")
unans = ax.bar(orx_axis, ory_axis_unans, width=0.8)

ax.set_xticks(np.arange(len(orx_axis)), orx_axis)
ax.set_yticks(np.arange(0, 24+1, 6))
ax.set_ylim([0, 24])

ax.tick_params(axis='x', labelrotation=45)

ax.bar_label(unans, padding=3)

plt.tight_layout()
fig.set_size_inches(w=6.0, h=2.2)    

ax.set_title('(a) Original Quizzes', y=0, pad=-55, verticalalignment="top")

plt.savefig('figures/unanswerable_original_quizzes.pgf')    

In [14]:
# Generated Quizzes that are unanswerable
fig, ax = plt.subplots()

plt.yticks(np.arange(0, 24+1, 6))
plt.xticks(rotation=45, ha='right')

plt.xlabel("Question ID")
plt.ylabel("Count")
unans = ax.bar(gex_axis, gey_axis_unans, width=0.8)

ax.set_xticks(np.arange(len(gex_axis)), gex_axis)
ax.set_yticks(np.arange(0, 24+1, 6))
ax.set_ylim([0, 24])

ax.tick_params(axis='x', labelrotation=45)

ax.bar_label(unans, padding=3)

plt.tight_layout()
fig.set_size_inches(w=6.0, h=2.2)    

ax.set_title('(b) EduQuiz-generated Quizzes', y=0, pad=-55, verticalalignment="top")

plt.savefig('figures/unanswerable_generated.pgf')    

# Section 8.3 - Filtering

In [15]:
# Calculate grades without rounding
orq_grades = []
geq_grades = []

for idx, row in df.iterrows():
    given_orq_answers = row[["ORQ1","ORQ2","ORQ3","ORQ4","ORQ5","ORQ6","ORQ7","ORQ8","ORQ9","ORQ10","ORQ11","ORQ12","ORQ13","ORQ14","ORQ15","ORQ16","ORQ17","ORQ18","ORQ19","ORQ20"]].tolist()
    given_geq_anwers = row[["GEQ1","GEQ2","GEQ3","GEQ4","GEQ5","GEQ6","GEQ7","GEQ8","GEQ9","GEQ10","GEQ11","GEQ12","GEQ13","GEQ14","GEQ15","GEQ16","GEQ17","GEQ18","GEQ19","GEQ20"]].tolist()
    
    orq_grade = len(set(given_orq_answers) & set(orq_answers)) / len(given_orq_answers) * 10
    geq_grade = len(set(given_geq_anwers) & set(geq_answers)) / len(given_geq_anwers) * 10
    
    orq_grades.append(orq_grade)
    geq_grades.append(geq_grade)

In [16]:
# Filtered Generated quiz answers
filtered_geq_answers = ["She disappeared from the spot where she was playing.",
              "The future is uncertain.",
              "Connecting with native speakers.",
              "The early-warning system.",
              "He has serious doubts about it.",
              "To wear a piece of rock from the top of the world.",
              "Air movements in the earth's atmosphere.",
              "Because it is a way to relax themselves.",
              "Tea.",
              "August 5th.",
              "He helped others.",
              "He took drugs with some friends."]

In [17]:
# Calculate filtered grades without rounding
    
filtered_geq_grades = []

for idx, row in df.iterrows():
    given_geq_anwers = row[["GEQ2","GEQ4","GEQ5","GEQ6","GEQ8","GEQ11","GEQ14","GEQ15","GEQ16","GEQ18","GEQ19","GEQ20"]].tolist()
    geq_grade = len(set(given_geq_anwers) & set(filtered_geq_answers)) / len(given_geq_anwers) * 10
    filtered_geq_grades.append(geq_grade)

In [18]:
# Unfiltered Scatterplot
fig, ax = plt.subplots()

plt.xticks(np.arange(0, 10+1, 1.0))
plt.yticks(np.arange(0, 10+1, 1.0))

plt.xlabel('Original Quizzes Grade')
plt.ylabel('Generated Quizzes Grade')

p_corr, p_value = pearsonr(orq_grades, geq_grades)

b, m = polyfit(orq_grades, geq_grades, 1)

ax.set_xlim([0, 10])
ax.set_ylim([0, 10])

x = np.arange(0, 10+1, 1.0)

plt.scatter(orq_grades, geq_grades)
plt.plot(x, b + m * x, '-')

plt.tight_layout()
fig.set_size_inches(w=3.2, h=3.2)  

ax.set_title(f"(a) Unfiltered\n Pearson's correlation (r): {p_corr:.3f} \np-value: {p_value:.3f}", y=0, pad=-35, verticalalignment="top")

plt.savefig('figures/scatterplot_unfiltered.pgf')

In [19]:
# Filtered Scatterplot
fig, ax = plt.subplots()

plt.xticks(np.arange(0, 10+1, 1.0))
plt.yticks(np.arange(0, 10+1, 1.0))

plt.xlabel('Original Quizzes Grade')
plt.ylabel('Filtered Generated Quizzes Grade')

p_corr, p_value = pearsonr(orq_grades, filtered_geq_grades)

b, m = polyfit(orq_grades, filtered_geq_grades, 1)

ax.set_xlim([0, 10])
ax.set_ylim([0, 10])

x = np.arange(0, 10+1, 1.0)

plt.scatter(orq_grades, filtered_geq_grades)
plt.plot(x, b + m * x, '-')

plt.tight_layout()
fig.set_size_inches(w=3.2, h=3.2)  

ax.set_title(f"(b) Filtered\n Pearson's correlation (r): {p_corr:.3f} \np-value: {p_value:.3f}", y=0, pad=-35, verticalalignment="top")

plt.savefig('figures/scatterplot_filtered.pgf')

# Section 9.1 & 9.2 - General and Detailed analysis

In [20]:
def print_quiz_results(sampled_id, quiz_name):
    x_axis = df[quiz_name].value_counts().index.tolist()        
    y_axis = df[quiz_name].value_counts().values.tolist()        
    
    labels = x_axis

    x = np.arange(len(x_axis))  # the label locations
    width = 0.2  # the width of the bars

    fig, ax = plt.subplots(figsize=(6,6))
    rects1 = ax.bar(x_axis, y_axis, width)

    ax.set_ylabel('Count')
    ax.set_title('Guessed answers')
    ax.set_xticks(x, labels)
    ax.set_yticks(np.arange(0, 24+1, 6))
    ax.set_ylim([0, 24])
    plt.yticks(np.arange(0, 24+1, 6))
    plt.xticks(rotation=45, ha='right')

    ax.bar_label(rects1, padding=3)

    fig.tight_layout()

    plt.savefig(f'raw_quiz_result_plots/{quiz_name}.png')   

In [21]:
original_labels = ["ORQ1","ORQ2","ORQ3","ORQ4","ORQ5","ORQ6","ORQ7","ORQ8","ORQ9","ORQ10","ORQ11","ORQ12","ORQ13","ORQ14","ORQ15","ORQ16","ORQ17","ORQ18","ORQ19","ORQ20"]

for i in range(len(sampled_idx)):
    print_quiz_results(sampled_idx[i], original_labels[i])

In [22]:
generated_labels = ["GEQ1","GEQ2","GEQ3","GEQ4","GEQ5","GEQ6","GEQ7","GEQ8","GEQ9","GEQ10","GEQ11","GEQ12","GEQ13","GEQ14","GEQ15","GEQ16","GEQ17","GEQ18","GEQ19","GEQ20"]

for i in range(len(sampled_idx)):
    print_quiz_results(sampled_idx[i], generated_labels[i])

# Generated Quiz 1 - unanswerable
19 times flagged as unanswerable

# Generated Quiz 2 - switch true/false answer
The answer that is most chosen (21) times is true but not flagged as the true answer. The flagged true answer is true but not the only thing that is true

# Generated Quiz 3 - unanswerable
6 times flagged as unanswerable

# Generated Quiz 4 - good quiz (slight modification)
The answers given are quite divided. The distractors can be true

# Generated Quiz 5 - good quiz
18 people got them right. 6 chose a closely related distractor

# Generated Quiz 6 - switch true/false answer
Hard quiz. True/false answer should be switched

# Generated Quiz 7 - unanswerable - distractors are true
Distractors can be true.

# Generated Quiz 8 - good quiz (slight modification)
Viable answers. Distractor could be true

# Generated Quiz 9 - unanswerable - distractors are true
Unanswerable 10 times

# Generated Quiz 10 - unanswerable
One distractor have to be changed

# Generated Quiz 11 - switch true/false answer
True/false answer should be switched

# Generated Quiz 12 - unanswerable - distractors are true
Unanswerable 12 times

# Generated Quiz 13 - unanswerable
10 times unanswerable. Double distractors

# Generated Quiz 14 - good quiz
19 got right. 5 got wrong

# Generated Quiz 15 - good quiz
20 got right. 4 wrong

# Generated Quiz 16 - good quiz
19 got right

# Generated Quiz 17 - unnaswerable - distractors are true
16 times unanswerable

# Generated Quiz 18 - good quiz (slight modification)
24 times good

# Generated Quiz 19 - switch true/false answer
good quiz. switch true/false answer

# Generated Quiz 20 - good quiz
21 times good.

# Summary
- Good Quiz:                       5
- Good Quiz Slight Modification:   3
- Good Quiz but switch true/false: 4
- Unanswerable Distractors true:   4
- Unanswerable:                    4

# Section 10.1 & 10.2 - Quiz Quality and AI in Education

In [23]:
def print_opinion_resultsh(sampled_id, quiz_name, opinion_options):
    y_axis = opinion_options
    x_axis = []
    
    for opinion in opinion_options:
        count = df[quiz_name].to_list().count(opinion)
        x_axis.append(count)
        
    labels = y_axis
    
    if quiz_name == "OPQ5":
        labels = ["Never", "Someti", "AboHal", "Most", "Always"]
    
    if quiz_name == 'OPQ6':
        labels = ["Disrup", "Hindra", "Neutra", "NotBot", "DidnNo"]
        
    if quiz_name in ['OPQ9', 'OPQ10', 'OPQ11', 'OPQ12']:
        labels = ["StrDis", "SomDis", "NaNd", "SomAgr", "StrAgr"]

    y = np.arange(len(labels))
    width = 0.5

    fig, ax = plt.subplots(figsize=(6,6))
    rects1 = ax.barh(y_axis, x_axis, width)

    ax.set_xlabel('Count')
    ax.set_yticks(y, labels)
    ax.set_xticks(np.arange(0, 24+1, 6))
    ax.set_xlim([0, 24])
    plt.xticks(np.arange(0, 24+1, 6))

    ax.bar_label(rects1, padding=3)

    fig.tight_layout()
    fig.set_size_inches(w=3.2, h=2.5)  
    
    ax.set_title(test_questions[quiz_name], y=0, pad=-40, verticalalignment="top", wrap=True)
    plt.savefig(f'figures/{quiz_name}.pgf')

In [24]:
opinion_options = [
                   ["Never", "Sometimes", "About half the time", "Most of the time", "Always"],
                   ["Disruptive / I got very confused", "Hindrance / It slowed me down", "Neutral / I just picked the best answer", "Not bothered / Also human tests have imperfect quizzes", "Didn't notice / I only focused on answering the quizzes"],
                   ["Strongly disagree", "Somewhat disagree", "Neither agree nor disagree", "Somewhat agree", "Strongly agree"],
                   ["Strongly disagree", "Somewhat disagree", "Neither agree nor disagree", "Somewhat agree", "Strongly agree"],
                   ["Strongly disagree", "Somewhat disagree", "Neither agree nor disagree", "Somewhat agree", "Strongly agree"],
                   ["Strongly disagree", "Somewhat disagree", "Neither agree nor disagree", "Somewhat agree", "Strongly agree"]
                  ]

In [25]:
opinion_questions_labels = ['OPQ5', 'OPQ6', 'OPQ9', 'OPQ10', 'OPQ11', 'OPQ12']

test_questions = {"OPQ5": "(a) How often did you recognize\nthat there was an AI-generated\nquiz during the test?\n",\
"OPQ6": "(b) One or more of the AI-\ngenerated quizzes may be\nimperfect. How did this affect\nyour overall test experience?",\
"OPQ9": "(a) AI can have a positive impact\non the educational domain.\n\n",\
"OPQ10": "(b) I would use Automatic Quiz\nGeneration to test my knowledge\nwhile learning.\n",\
"OPQ11": "(c) Automatic Quiz Generation\ncould help teachers to simplify\nthe process of creating tests.\n",\
"OPQ12": "(d) Research on AI-powered\neducation is important.\n\n"}

In [26]:
for idx, key in enumerate(opinion_questions_labels):
    print_opinion_resultsh(opinion_questions_dict[key], key, opinion_options=opinion_options[idx])

# Section 10.3 - Open questions (in Latex template)

In [27]:
print(opinion_questions_dict["OPQ7"], '\n')
for i in range(len(df["OPQ7"])):
    print(f'P{i+1} &', df["OPQ7"].iloc[i], '\\\\')

Name three strengths of the AI generated quizzes 

P1 & The questions were mostly specific, often the answer was just in the text, and the questions were often gramatically correct \\
P2 & Not superficial... \\
P3 & Releative clear stories compared to the human generated ones. \\
P4 & Easy, quick and low cost to make \\
P5 & Cheap, no human intervention, scalable \\
P6 & It’s easier to make, it’s faster and a lot of quizzes can be made in a short time period \\
P7 & Fact based, efficiënt, high level questions \\
P8 & Convenient, interesting, \\
P9 & understandable, clear, good \\
P10 & - \\
P11 & Grammar, structure & level \\
P12 & Specific questions, answer not that hard to find, clear paragraphs \\
P13 & Fast, reliable and creative \\
P14 & Often realistic, similar too human quizes, double anwsers \\
P15 & Ease, uniqueness, bias \\
P16 & No spelling mistakes, Logical answering options, No clear wrong answers (similarity in answers is good) \\
P17 & Less time needed for human interfer

In [28]:
print(opinion_questions_dict["OPQ8"], '\n')
for i in range(len(df["OPQ8"])):
    print(f'P{i+1} &', df["OPQ8"].iloc[i], '\\\\')

Name three limitations of the AI generated quizzes 

P1 & Some questions were vague, some questions were identical, some questions had the same answer multiple times \\
P2 & Not always answerable, too vague,  \\
P3 & The answer often contained the same words as the text, to easy to search. \\
P4 & High chance of questions that dont make sense, the AI does not take into account what is the key lesson that the student should be tested on, it just replicates the tests it was trained on. Teachers have better insight in what they want the test to test. \\
P5 & Double answers, incorrect answers, no explanation \\
P6 & The quizzes can have some errors, the AI might misunderstand the texts and when a lot of quizzes are generated it is hard to check them \\
P7 & Limitation of interpretation, difficulty emotion based questions, need to review at beginning of learning cycle \\
P8 & Not able to read context, no proper language, same questions asked \\
P9 & long and much \\
P10 & - \\
P11 & Answers

In [29]:
print(opinion_questions_dict["OPQ13"], '\n')
for i in range(len(df["OPQ13"])):
    print(f'P{i+1} &', df["OPQ13"].iloc[i], '\\\\')

List three reasons why we would support research into AI-powered education. 

P1 & It takes away a lot of time for teachers, it can also help students by generating endless quizes to practice, and can unlock all the texts on the internet \\
P2 & Possbility to bring higher level education to more people easily,... \\
P3 & nan \\
P4 & There are various risks involved, these should be studied thoroughly. It might reduce the work teachers need to do. It might help students prepare for tests  \\
P5 & Makes it easier to provide education in less populated regions, makes it easier to create more educational content, might get better than humans at some point \\
P6 & It can help improve the overall education and it can help teachers giving beter education \\
P7 & Efficiency, accuracy, from reseach point of view \\
P8 & nan \\
P9 & fast, improvement, future \\
P10 & - \\
P11 &   Studying opportunities, easy to crrate exams for teachers and maybe to be used to create online learning for third wo

In [30]:
print(opinion_questions_dict["OPQ14"], '\n')
for i in range(len(df["OPQ14"])):
    print(f'P{i+1} &', df["OPQ14"].iloc[i], '\\\\')

Do you see any ethical or societal risks, or other barriers to successful deployment of AI in education? 

P1 & Using all texts on the internet might also come with risks so safeguards need to be in place to prevent harmful language. I think as we currently stand, there still needs to be a human to check the texts and answers to make sure that it is all correct and questions are answerable \\
P2 & Bias in data, even more than human bias of teachers currently \\
P3 & nan \\
P4 & Yes, it can be hard to make sure that an AI tests the right knowledge. It is also hard to determine what tests are good and which are not (in terms of what data to train on), prevent racism or other things that might occur in tests created by humans. \\
P5 & Possibly no/incomplete explanation of the answer, bias in training data leads to bias in created content \\
P6 & When the AI malfunctions it can have ethical or societal risks \\
P7 & No \\
P8 & nan \\
P9 & No, but I would always have the teacher check it. \