# GradingUtility.ipynb
> A notebook for formulating prompts and prompting

In this notebook, we create some base functionality for grading and uploading student responses in a simplified, unified way.

:::{.callout-caution}
These notebooks are development notebooks, meaning that they are meant to be run locally or somewhere that supports navigating a full repository (in other words, not Google Colab unless you clone the entire repository to drive and then mount the Drive-Repository.) However, it is expected if you're able to do all of those steps, you're likely also able to figure out the required pip installs for development there.
:::


In [None]:
#| default_exp GradingUtility

In [None]:
#| export
import ipywidgets as widgets
from IPython.display import display, HTML
import io
import zipfile
import os
import json
import pandas as pd
import glob

In [None]:
#| export
# "global" variables modified by mutability
grade_settings = {'learning_objectives':None,
                  'json_file_path':None,
                  'json_files':None }

In [None]:
#| export
class InstructorGradingConfig:
    def __init__(self):
        # layouts to help with styling
        self.items_layout = widgets.Layout(width='auto')

        self.box_layout = widgets.Layout(display='flex',
                                          flex_flow='column',
                                          align_items='stretch',
                                          width='50%',
                                          border='solid 1px gray',
                                          padding='0px 30px 20px 30px')

        # Create all components
        self.ui_title = widgets.HTML(value="<h2>Instructor Grading Configuration</h2>")

        self.run_button = widgets.Button(description='Submit', button_style='success', icon='check')
        self.status_output = widgets.Output()
        self.status_output.append_stdout('Waiting...')

        # Setup click behavior
        self.run_button.on_click(self._setup_environment)

        # Reset rest of state
        self.reset_state()

    def reset_state(self, close_all=False):

        if close_all:
            self.learning_objectives_text.close()
            self.file_upload.close()
            self.file_upload_box.close()
            #self.ui_container.close()

        self.learning_objectives_text = widgets.Textarea(value='', description='Learning Objectives',
                                                         placeholder='Learning objectives: 1. Understand and implement classes in object-oriented programming',
                                                         layout=self.items_layout,
                                                         style={'description_width': 'initial'})
        self.file_upload = widgets.FileUpload(
            accept='.zip',  # Accepted file extension e.g. '.txt', '.pdf', 'image/*', 'image/*,.pdf'
            multiple=False  # True to accept multiple files upload else False
        )
        self.file_upload_box = widgets.HBox([widgets.Label('Upload User Files:\t'), self.file_upload])


        # Create a VBox container to arrange the widgets vertically
        self.ui_container = widgets.VBox([self.ui_title, self.learning_objectives_text,
                                           self.file_upload_box, self.run_button, self.status_output],
                                          layout=self.box_layout)


    def _setup_environment(self, btn):
        grade_settings['learning_objectives'] = self.learning_objectives_text.value
        grade_settings['json_file_path'] = self.file_upload.value

        if self.file_upload.value:
            try:
                input_file = list(self.file_upload.value.values())[0]
                extracted_zip_dir = list(grade_settings['json_file_path'].keys())[0][:-4]
            except:
                input_file = self.file_upload.value[0]
                extracted_zip_dir = self.file_upload.value[0]['name'][:-4]

            self.status_output.clear_output()
            self.status_output.append_stdout('Loading zip file...\n')

            with zipfile.ZipFile(io.BytesIO(input_file['content']), "r") as z:
                z.extractall()
                extracted_files = z.namelist()

            self.status_output.append_stdout('Extracted files and directories: {0}\n'.format(', '.join(extracted_files)))

            # load all json files
            grade_settings['json_files'] = glob.glob(''.join([extracted_zip_dir, '/**/*.json']), recursive=True)

            #status_output.clear_output()
            self.status_output.append_stdout('Loading successful!\nLearning Objectives: {0}\nExtracted JSON files: {1}'.format(grade_settings['learning_objectives'],
                                                                                                        ', '.join(grade_settings['json_files'])))

        else:
            self.status_output.clear_output()
            self.status_output.append_stdout('Please upload a zip file.')

        # Clear values so they're not saved
        self.learning_objectives_text.value = ''
        self.reset_state(close_all=True)
        self.run_ui_container()

        with self.status_output:
            print('Extracted files and directories: {0}\n'.format(', '.join(extracted_files)))
            print('Loading successful!\nLearning Objectives: {0}\nExtracted JSON files: {1}'.format(grade_settings['learning_objectives'],
                                                                                                        ', '.join(grade_settings['json_files'])))
            print('Submitted and Reset all values.')


    def run_ui_container(self):
        display(self.ui_container, clear=True)

In [None]:
import unittest
import io
import zipfile
import glob
from unittest.mock import patch

class TestInstructorGradingConfig(unittest.TestCase):
    def setUp(self):
        self.grading_config = InstructorGradingConfig()

    def test_reset_state(self):
        self.grading_config.reset_state()
        self.assertEqual(self.grading_config.learning_objectives_text.value, '')
        self.assertEqual(self.grading_config.file_upload.value, {})
        self.assertIsInstance(self.grading_config.ui_container, widgets.VBox)

    def test_setup_environment_no_file(self):
        with patch('sys.stdout', new=io.StringIO()) as fake_stdout:
            self.grading_config._setup_environment(None)
            self.assertEqual(fake_stdout.getvalue().strip(), 'Please upload a zip file.')
        self.assertEqual(self.grading_config.status_output.outputs[0]['text'], 'Please upload a zip file.')

    def test_setup_environment_invalid_file(self):
        self.grading_config.file_upload.value = {'invalid_file': {'content': b'invalid content'}}
        with patch('sys.stdout', new=io.StringIO()) as fake_stdout:
            self.grading_config._setup_environment(None)
            self.assertEqual(fake_stdout.getvalue().strip(), 'Loading zip file...')
        self.assertEqual(self.grading_config.status_output.outputs[0]['text'], 'Loading zip file...\n')

    def test_setup_environment_valid_file(self):
        self.grading_config.file_upload.value = {'valid_file.zip': {'content': b'valid zip content'}}
        with patch('sys.stdout', new=io.StringIO()) as fake_stdout:
            self.grading_config._setup_environment(None)
            self.assertEqual(fake_stdout.getvalue().strip(), 'Loading zip file...\nExtracted files and directories: valid_file.json\nLoading successful!\nLearning Objectives: \nExtracted JSON files: valid_file.json')
        self.assertEqual(self.grading_config.status_output.outputs[0]['text'], 'Loading zip file...\nExtracted files and directories: valid_file.json\nLoading successful!\nLearning Objectives: \nExtracted JSON files: valid_file.json')

    def test_run_ui_container(self):
        with patch('IPython.display.display') as mock_display:
            self.grading_config.run_ui_container()
            mock_display.assert_called_once_with(self.grading_config.ui_container, clear=True)

    def tearDown(self):
        del self.grading_config

if __name__ == '__main__':
    unittest.main(argv=['first-arg-is-ignored'], exit=False)

VBox(children=(HTML(value='<h2>Instructor Grading Configuration</h2>'), Textarea(value='', description='Learni…

FEEE
ERROR: test_setup_environment_invalid_file (__main__.TestInstructorGradingConfig)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/tmp/ipykernel_69693/2558061482.py", line 24, in test_setup_environment_invalid_file
    self.grading_config.file_upload.value = {'invalid_file': {'content': b'invalid content'}}
  File "/usr/local/lib/python3.10/dist-packages/traitlets/traitlets.py", line 732, in __set__
    self.set(obj, value)
  File "/usr/local/lib/python3.10/dist-packages/traitlets/traitlets.py", line 706, in set
    new_value = self._validate(obj, value)
  File "/usr/local/lib/python3.10/dist-packages/traitlets/traitlets.py", line 738, in _validate
    value = self.validate(obj, value)
  File "/usr/local/lib/python3.10/dist-packages/traitlets/traitlets.py", line 2867, in validate
    value = super().validate(obj, value)
  File "/usr/local/lib/python3.10/dist-packages/traitlets/traitlets.py", line 2151, in validate
 

`set_css`

In [None]:
#| export
def set_css():
  display(HTML('''
  <style>
    pre {
        white-space: pre-wrap;
    }
  </style>
  '''))
get_ipython().events.register('pre_run_cell', set_css)

Test for `set_css`

In [None]:
def test_set_css():
    try:
        set_css()
        print("set_css() test passed.")
    except Exception as e:
        print(f"set_css() test failed: {e}")

test_set_css()

set_css() test passed.


`clean_keys`

In [None]:
#| export
def clean_keys(data):
    cleaned_data = []
    for item in data:
        cleaned_item = {}
        for key, value in item.items():
            cleaned_key = key.strip()
            cleaned_value = value.strip() if isinstance(value, str) else value
            cleaned_item[cleaned_key] = cleaned_value
        cleaned_data.append(cleaned_item)
    return cleaned_data

Tests for `clean_keys`

In [None]:
def test_clean_keys():
    # Test with a single dictionary
    input_data = [{" foo ": " bar ", " baz ": " qux "}]
    expected_output = [{"foo": "bar", "baz": "qux"}]
    assert clean_keys(input_data) == expected_output

    # Test with multiple dictionaries
    input_data = [
        {" foo ": " bar ", " baz ": " qux "},
        {" spam ": " eggs ", " ham ": " bacon "},
    ]
    expected_output = [
        {"foo": "bar", "baz": "qux"},
        {"spam": "eggs", "ham": "bacon"},
    ]
    assert clean_keys(input_data) == expected_output

    # Test with empty input
    input_data = []
    expected_output = []
    assert clean_keys(input_data) == expected_output

    # Test with input that is not a list of dictionaries
    input_data = "not a list of dictionaries"
    try:
        clean_keys(input_data)
    except AttributeError:
        pass
    else:
        raise AssertionError("Expected AttributeError to be raised")
    
test_clean_keys()

`file_upload_json_to_df`

In [None]:
#| export
def file_upload_json_to_df(upload_json):

  #get middle key of json to extract content
  fname = list(upload_json.keys())[0]

  #load the json; strict allows us to get around encoding issues
  loaded_json = json.loads(upload_json[fname]['content'], strict=False)

  #clean the keys if needed
  loaded_json = clean_keys(loaded_json)

  return pd.DataFrame(loaded_json)

Tests for `file_upload_json_to_df`

In [None]:
def test_file_upload_json_to_df():
    # Test case 1: Test with a simple JSON file
    test_json = {"data.json": {"content": '{"name": "John", "age": 30, "city": "New York"}'}}
    expected_output = pd.DataFrame({"name": ["John"], "age": [30], "city": ["New York"]})
    assert file_upload_json_to_df(test_json).equals(expected_output)

    # Test case 2: Test with a JSON file containing nested objects
    test_json = {"data.json": {"content": '{"name": "John", "age": 30, "address": {"city": "New York", "state": "NY"}}'}}
    expected_output = pd.DataFrame({"name": ["John"], "age": [30], "address_city": ["New York"], "address_state": ["NY"]})
    assert file_upload_json_to_df(test_json).equals(expected_output)

    # Test case 3: Test with a JSON file containing an array of objects
    test_json = {"data.json": {"content": '[{"name": "John", "age": 30}, {"name": "Jane", "age": 25}]'}}
    expected_output = pd.DataFrame({"name": ["John", "Jane"], "age": [30, 25]})
    assert file_upload_json_to_df(test_json).equals(expected_output)

    # Test case 4: Test with a JSON file containing an array of nested objects
    test_json = {"data.json": {"content": '[{"name": "John", "age": 30, "address": {"city": "New York", "state": "NY"}}, {"name": "Jane", "age": 25, "address": {"city": "San Francisco", "state": "CA"}}]'}}
    expected_output = pd.DataFrame({"name": ["John", "Jane"], "age": [30, 25], "address_city": ["New York", "San Francisco"], "address_state": ["NY", "CA"]})
    assert file_upload_json_to_df(test_json).equals(expected_output)

    # Test case 5: Test with a JSON file containing an array of objects with different keys
    test_json = {"data.json": {"content": '[{"name": "John", "age": 30}, {"name": "Jane", "city": "San Francisco"}]'}}
    expected_output = pd.DataFrame({"name": ["John", "Jane"], "age": [30, None], "city": [None, "San Francisco"]})
    assert file_upload_json_to_df(test_json).equals(expected_output)

test_file_upload_json_to_df()

`load_json_as_df`

In [None]:
#| export
def load_json_as_df(fpath):
    # check if file is .json
    if not fpath.endswith('.json'):
        return None

    keys = ["timestamp", "author", "message"]

    df_out = None
    out_error = None

    try:
        # Read JSON file
        with open(fpath, "r") as f:
            json_data = f.read()

        # Load JSON data
        data = json.loads(json_data, strict=False)

        # Quick check to see if we can fix common errors in json
        # 1. JSON responses wrapped in enclosing dictionary
        if isinstance(data, dict):
          if len(data.keys()) == 1:
            data = data[list(data.keys())[0]]
          else:
            data = [data] #convert to list otherwise

        # We only operate on lists of dictionaries
        if isinstance(data, list):
          data = clean_keys(data) #clean keys to make sure there are no unnecessary newlines

          if all(all(k in d for k in keys) for d in data):
              df_out = pd.json_normalize(data)
              if len(df_out) <=1:
                out_error = [fpath, "Warning: JSON keys correct, but something wrong with the overall structure of the JSON when converting to dataframe. The dataframe only has one row. Skipping."]
                df_out = None
          else:
              out_error = [fpath, "Error: JSON Keys are incorrect. Found keys: " + str(list(data[0].keys()))]
        else:
            out_error = [fpath, "Error: Something is wrong with the structure of the JSON."]

    except Exception as e:
        print(f"Error processing file {fpath}: {str(e)}")
        out_error = [fpath, "Fatal System Error: "+str(e)]

    if df_out is not None:
        df_out['filename'] = fpath

    return df_out, out_error

Tests for `load_json_as_df`

In [None]:
def load_json_as_df(fpath):
    # check if file is .json
    if not fpath.endswith('.json'):
        return None

    keys = ["timestamp", "author", "message"]

    df_out = None
    out_error = None

    try:
        # Read JSON file
        with open(fpath, "r") as f:
            json_data = f.read()

        # Load JSON data
        data = json.loads(json_data, strict=False)

        # Quick check to see if we can fix common errors in json
        # 1. JSON responses wrapped in enclosing dictionary
        if isinstance(data, dict):
          if len(data.keys()) == 1:
            data = data[list(data.keys())[0]]
          else:
            data = [data] #convert to list otherwise

        # We only operate on lists of dictionaries
        if isinstance(data, list):
          data = clean_keys(data) #clean keys to make sure there are no unnecessary newlines

          if all(all(k in d for k in keys) for d in data):
              df_out = pd.json_normalize(data)
              if len(df_out) <=1:
                out_error = [fpath, "Warning: JSON keys correct, but something wrong with the overall structure of the JSON when converting to dataframe. The dataframe only has one row. Skipping."]
                df_out = None
          else:
              out_error = [fpath, "Error: JSON Keys are incorrect. Found keys: " + str(list(data[0].keys()))]
        else:
            out_error = [fpath, "Error: Something is wrong with the structure of the JSON."]

    except Exception as e:
        print(f"Error processing file {fpath}: {str(e)}")
        out_error = [fpath, "Fatal System Error: "+str(e)]

    if df_out is not None:
        df_out['filename'] = fpath

    return df_out, out_error

def test_load_json_as_df():
    # Create a temporary JSON file
    data = [
        {"timestamp": "2022-01-01 00:00:00", "author": "Alice", "message": "Hello"},
        {"timestamp": "2022-01-01 00:01:00", "author": "Bob", "message": "Hi there"},
    ]
    fpath = "test.json"
    with open(fpath, "w") as f:
        f.write(json.dumps(data))

    # Call the function
    df, error = load_json_as_df(fpath)

    # Check the output
    assert isinstance(df, pd.DataFrame)
    assert len(df) == 2
    assert set(df.columns) == set(["timestamp", "author", "message", "filename"])
    assert df.iloc[0]["timestamp"] == "2022-01-01 00:00:00"
    assert df.iloc[1]["author"] == "Bob"
    assert df.iloc[0]["filename"] == fpath

    # Clean up
    os.remove(fpath)

test_load_json_as_df()

`pretty_print`

In [None]:
#| export
def pretty_print(df):
    return display( HTML( df.to_html().replace("\\n","<br>") ) )

Test for `pretty_print`

In [None]:
def test_pretty_print():
    df = pd.DataFrame({
        'name': ['Alice', 'Bob', 'Charlie'],
        'grade': [80, 90, 85]
    })

    result_output = pretty_print(df)

    assert result_output != '', 'Output string is empty'

test_pretty_print()

`save_as_csv`

In [None]:
#| export
def save_as_csv(df, file_name):
  df.to_csv(file_name, index=False)

Tests for `save_as_csv`

In [None]:
def test_save_as_csv():
    df = pd.DataFrame({
        'name': ['Alice', 'Bob', 'Charlie'],
        'grade': [80, 90, 85]
    })
    file_name = 'test.csv'

    save_as_csv(df, file_name)

    assert os.path.exists(file_name), 'File does not exist'

    os.remove(file_name)

test_save_as_csv()

`show_json_loading_errors`

In [None]:
#| export
def show_json_loading_errors(err_list):
  if err_list:
    print("The following files have the following errors upon loading and will NOT be processed:", '\n'.join(err_list))
  else:
    print("No errors found in uploaded zip JSON files.")

Test for `show_json_loading_errors`

In [None]:
import sys

def test_show_json_loading_errors():
    err_list = ['file1.json', 'file2.json']
    expected_output = 'The following files have the following errors upon loading and will NOT be processed: file1.json\nfile2.json\n'

    captured_output = io.StringIO()
    sys.stdout = captured_output

    show_json_loading_errors(err_list)

    sys.stdout = sys.__stdout__

    assert captured_output.getvalue() == expected_output, 'Output string is incorrect'

test_show_json_loading_errors()

In [None]:
#| export
default_ai_assisted_setup = output_setup = ("Given the following chat log, create a table with the question number, the question content, answer, "
                  "whether or not the student answered correctly on the first try, and the number of attempts it took to get the right answer. ")

default_ai_assisted_grading_instructions = ("Then, calculate the quiz grade from the total number of assessment questions. "
                  "Importantly, a point should only be granted if an answer was correct on the very first attempt. "
                  "If an answer was not correct on the first attempt, even if it was correct in subsequent attempts, no point should be awarded for that question. ")

bloom_assisted_output_setup = None

bloom_assistedgrading_instructions = """\nEvaluate the student's overall level or engagement and knowledge, based on bloom's taxonomy using their responses.
Bloom's taxonomy is rated on a 1-6 point system, with 1 being remember (recall facts and basic concepts), 2 being understand (explain ideas or concepts),
3 being apply (use information in new situations), 4 being analyze (draw connections among ideas), 5 being evaluate (justify a stand or decision),
and 6 being create (produce new or original work). Assign the interaction a score from 1-6, where 1 = remember, 2 = understand, 3 = apply, 4 = analyze,
5 = evaluate, and 6 = create."""
