In [1]:
# Imports
import pandas as pd
import numpy as np
from transformers import pipeline
from datasets import Dataset
import json

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
DATAPATH = "/home/kaariaa3/mscthesis/data/out.csv"

SYSTEM_PROMPT = """I want you to act as a programming teacher for an in-
troductory Dart course. Your students are programming
novices. I will provide some coding example exercises,
and it will be your job to critique them. Your responses 
should be written in simple English. Do not cite music 
lyrics or books. Do not include any greetings, be concise. 
Do not mention trigger words associated with mental or 
physical disorders, for example, weight loss or diet."""

CRITIQUE_TEMPLATE = """You are evaluating a programming exercise.

Intended theme: $THEME$
Intended topic: $TOPIC$
Intended programming concept: $CONCEPT$

Exercise description:
$TEXT$

Example solution:
$CODE$

Determine whether the exercise follows each requirement:

1. Theme
2. Topic
3. Programming concept

If it fails any requirement, explain WHY in detail.
Focus on concrete mismatches between instructions and content.
"""

# TEST!
TEMPLATE2 = """Analyze the exercise step by step.
Step 1 — Identify what the exercise is actually about.
Step 2 — Check alignment with the theme.
Step 3 — Check alignment with the topic.
Step 4 — Check alignment with the programming concept.
Step 5 — Provide a final explanation of failures.
"""

def make_prompt(row):
    _, topic, theme, concept, problem_description, example_solution, *_ = row
    return (
        CRITIQUE_TEMPLATE.replace("$THEME$", theme)
        .replace("$TOPIC$", topic)
        .replace("$CONCEPT$", concept)
        .replace("$TEXT$", problem_description)
        .replace("$CODE$", example_solution)
    )


# Dummy function
def run_model(data):
    data["Correct"] = "test"
    data["Explanation"] = "test"
    return data

In [3]:
df = pd.read_csv("../../data/out.csv", sep=";")
df.head()

Unnamed: 0,title,topic,theme,concept,problemDescription,exampleSolution,The exercise description matched the selected theme (Yes/Partially/No),The exercise description matched the selected topic (Yes/Partially/No),The exercise description matched the selected concept (Yes/No),All metrics are correct (Yes/No)
0,Wildlife Spotting,wildlife spotting,outdoor activities,user input,Write a program that asks the user for their f...,"{'code': ""import 'dart:io';main() { print('Wh...",yes,yes,yes,yes
1,Freeze Dance Game!,Freeze Dance,party games,user input,Write a program that asks the user if they wan...,"{'code': ""import 'dart:io';main() { print('Do...",yes,yes,yes,yes
2,Rye Bread Order,Rye bread,food,user input,Write a program that asks the user for their n...,"{'code': ""import 'dart:io';main() { print('Wh...",yes,yes,yes,yes
3,Tower of London!,Tower of London,historical landmarks,user input,Write a program that asks the user for their f...,"{'code': ""import 'dart:io';main() { print('Wh...",yes,no,yes,no
4,Holiday Movies,watching holiday movies,Christmas,program output,Write a program that asks the user for their f...,"{'code': ""import 'dart:io';main() { print('Wh...",no,yes,yes,no


In [4]:
label_col = df.columns[-1]
cond = df[label_col] == "no"

wrongs = df[cond]
wrongs.head()

# eval_df = wrongs
eval_df = df

In [5]:
print("Creating prompts...")
eval_df["prompt"] = eval_df.apply(make_prompt, axis=1)
dataset = Dataset.from_pandas(eval_df)

dataset = dataset.map(run_model)

Creating prompts...


Map: 100%|██████████| 33/33 [00:00<00:00, 4522.83 examples/s]


In [6]:
dataset[0]

{'title': 'Wildlife Spotting',
 'topic': 'wildlife spotting',
 'theme': 'outdoor activities',
 'concept': 'user input',
 'problemDescription': 'Write a program that asks the user for their favorite animal to spot in the wild. After this, the program prints a message to the user ‘I hope you see a/an animal!’, where animal is the animal entered by the user. For example, with the input `Eagle`, the program output is as follows:\n\n```\nWhich is your favorite animal to spot in the wild?\nEagle\nI hope you see a Eagle!\n```\n\nSimilarly, if the user enters the animal `Deer`, the program output is as follows:\n\n```\nWhich is your favorite animal to spot in the wild?\nDeer\nI hope you see a Deer!\n```',
 'exampleSolution': '{\'code\': "import \'dart:io\';main() {  print(\'Which is your favorite animal to spot in the wild?\');  var animal = stdin.readLineSync();  print(\'I hope you see a $animal!\');}"}',
 'The exercise description matched the selected theme (Yes/Partially/No)': 'yes',
 'The 