In [42]:
# Imports
import pandas as pd
import numpy as np

In [43]:
# Definitions
filename = r"int_agreement (cleaned).xlsx"
sheet_names = ["Arto", "Evanfiya", "James", "Juho", "Sami"]


def load_spreadsheet(filepath):
    sheets = pd.read_excel(filename, sheet_name=sheet_names)
    return sheets

def label_data(df, slicer, predicate, label="label", use_all=True):
    reducer = all if use_all else any
    labels = (df.iloc[slicer] # Pick relevant data
        .apply(predicate) # Apply predicate
        .apply(reducer, axis=1) # Reduce to single boolean column
        .apply(lambda label : "yes" if label else "no")) # Convert back to yes/no
    labels.name = label
    return labels

# Filters
is_null = lambda x: x.isnull()

In [44]:
sheets = load_spreadsheet(filename)
df = pd.concat(sheets.values(), axis=0).dropna(axis=0, subset="title")
#df.reset_index(inplace=True, drop=True)

In [45]:
slice_s = 11
slice_e = 14
eval_cols = df.columns[slice_s:slice_e]

yes_df = df[eval_cols].eq("yes").astype(int).groupby(df.index).sum().eq(5).map(lambda x: "yes" if x else "no")

df = df.drop_duplicates("title")
df[eval_cols] = yes_df[eval_cols]

df.head()

Unnamed: 0,title,problemDescription,exampleSolution,starterCode,tests,theme,topic,concept,difficulty,id,The exercise description was clear (Yes/Partially/No),The exercise description matched the selected theme (Yes/Partially/No),The exercise description matched the selected topic (Yes/Partially/No),The exercise description matched the selected concept (Yes/No),Included concepts that were too advanced (Yes/No)\n,The exercise difficulty matched the selected difficulty (Too easy/Okay/Too difficult),Shallow vs deep personalization (Deep/Shallow/Unsure),Open field
0,Wildlife Spotting,Write a program that asks the user for their f...,"{'code': ""import 'dart:io';main() { print('Wh...","{'code': ""import 'dart:io';main() {}""}","{'testCode': ""import 'package:test/test.dart';...",outdoor activities,wildlife spotting,user input,advanced,-35866480000000.0,partially,yes,yes,yes,no,okay,shallow,"The handout had ""I hope you see a/an animal"", ..."
1,Freeze Dance Game!,Write a program that asks the user if they wan...,"{'code': ""import 'dart:io';main() { print('Do...","{'code': ""import 'dart:io';main() {}""}","{'testCode': ""import 'package:test/test.dart';...",party games,Freeze Dance,user input,normal,-29922540000000.0,yes,yes,yes,yes,yes,too difficult,shallow,Requires knowledge of conditionals.
2,Rye Bread Order,Write a program that asks the user for their n...,"{'code': ""import 'dart:io';main() { print('Wh...","{'code': ""import 'dart:io';main() {}""}","{'testCode': ""import 'package:test/test.dart';...",food,Rye bread,user input,advanced,170229800000000.0,yes,yes,yes,yes,no,okay,unsure,
3,Tower of London!,Write a program that asks the user for their f...,"{'code': ""import 'dart:io';main() { print('Wh...","{'code': ""import 'dart:io';main() {}""}","{'testCode': ""import 'package:test/test.dart';...",historical landmarks,Tower of London,user input,normal,674498100000000.0,yes,yes,no,yes,no,okay,shallow,
4,Holiday Movies,Write a program that asks the user for their f...,"{'code': ""import 'dart:io';main() { print('Wh...","{'code': ""import 'dart:io';main() {}""}","{'testCode': ""import 'package:test/test.dart';...",Christmas,watching holiday movies,program output,normal,-49789220000000.0,yes,no,yes,yes,no,okay,shallow,


In [46]:
slice_obj = np.s_[:, slice_s:slice_e]
pred = lambda s: s.str.lower() == "yes"
label = "All metrics are correct (Yes/No)"

result2 = pd.concat([df, label_data(df, slice_obj, pred, label)], axis=1)

In [47]:
stats = [
    *range(slice_s, slice_e),
    len(result2.columns) - 1
]

count_yes = (result2.iloc[:, stats] == "yes").sum()
count_no = len(result2) - count_yes
percentages = count_yes / len(result2)

pd.DataFrame(data={"Correct (Yes)": count_yes, "Incorrect (Partially/No)": count_no, "Percentage of correct": percentages})

#df[label].where(lambda x : x == "yes").dropna().count() / len(df[label])

Unnamed: 0,Correct (Yes),Incorrect (Partially/No),Percentage of correct
The exercise description matched the selected theme (Yes/Partially/No),31,2,0.939394
The exercise description matched the selected topic (Yes/Partially/No),28,5,0.848485
The exercise description matched the selected concept (Yes/No),29,4,0.878788
All metrics are correct (Yes/No),24,9,0.727273


In [48]:
result2.to_csv("out.csv", sep=";", columns=["title", "topic", "theme", "concept", "problemDescription", "exampleSolution", *eval_cols, label], index=False)