In [76]:
import os
import logging
import pandas as pd
from pathlib import Path
import yaml

In [77]:
# Set working directory - Not required if using Jupyter outside of VScode
workdir = os.environ["workdir"]
os.chdir(workdir)
os.getcwd()

'/Users/amiralkateb/Documents/GitRepos/audenshaw_exam_validation'

In [78]:
logging.basicConfig(
    level=logging.INFO,
    format='%(process)d-%(levelname)s-%(message)s',
    datefmt="%Y-%m-%d %H:%M:%S",
    )


In [79]:
# helper function
def extract_level_structure(markstring):
  
    levels_dict = {}
    data = yaml.safe_load(markstring)

    for level, content in data.items():
        try:
            marks = content['marks_awarded']
            levels_dict[int(level[-1])] = sorted({int(marks['minimum']), int(marks['maximum'])})
        except:
            print("yaml error")
    #if there is no 0, add it in
    if min(levels_dict.get(min(levels_dict.keys()))) >0:
        levels_dict[0] = [0]
        levels_dict = {key: levels_dict[key] for key in sorted(levels_dict)}

    if len(levels_dict.keys()) > 0:
        return levels_dict
    else:
        return None


In [80]:
data_dir = Path(f"{workdir}/data")

question_info = pd.read_csv(data_dir / "questions.csv")
mark_scheme_table = pd.read_csv(data_dir / "mark_scheme.csv")
student_answers = pd.read_csv(data_dir / "answers.csv")


In [81]:
question_answers_merged = pd.merge(student_answers, question_info, on = ["question_id", "subject_id"], how = "left")

In [82]:
# Join Mark Scheme Text
question_answers_ms_merged = pd.merge(question_answers_merged, mark_scheme_table[["question_id", "subject_id", "structured_mark_scheme_text"]], on = ["question_id", "subject_id"], how = "left")

# Rename Columns
question_answers_ms_merged = question_answers_ms_merged.rename(columns = {"structured_mark_scheme_text": "mark_scheme_text"})

In [83]:
columns = ["subject_id", "question_id", "question_type", "student_id","question_text", "mark_scheme_text", "context", "answer_text", "awarded_marks", "total_marks", "answer_id", "linked_answer_id", "topic_id", "answer_scanned_image"]
student_answers_pivoted_merged = question_answers_ms_merged[columns]
student_answers_pivoted_merged = student_answers_pivoted_merged.sort_values(by=["student_id", "question_id"]).reset_index(drop = True)

In [84]:
# Check for Duplicates and ensure tables are the same size
assert student_answers.drop_duplicates().shape[0] == student_answers_pivoted_merged.drop_duplicates().shape[0]

In [85]:

valid_types = [
    'bs_discuss', 'bs_analyse', 'bs_justify', 'bs_evaluate', 
    'hs_explain', 'hs_analyse', 'hs_judgement', 'hs_spag'
]
student_answers_pivoted_merged['level_structure'] = student_answers_pivoted_merged.apply(lambda row: extract_level_structure(row['mark_scheme_text']) if row['question_type'] in valid_types else None, axis =1)
        




#student_answers_pivoted_merged.loc[0]['mark_scheme_text']

student_answers_pivoted_merged['level_structure'].where(student_answers_pivoted_merged['question_type'] == 'bs_mcq')

#

yaml error
yaml error
yaml error
yaml error
yaml error
yaml error
yaml error
yaml error
yaml error
yaml error
yaml error
yaml error
yaml error
yaml error
yaml error
yaml error
yaml error
yaml error
yaml error
yaml error


0      NaN
1      NaN
2      NaN
3      NaN
4      NaN
      ... 
138    NaN
139    NaN
140    NaN
141    NaN
142    NaN
Name: level_structure, Length: 143, dtype: object

In [86]:
# Save questions and answers
print(student_answers_pivoted_merged.loc[2]['level_structure'])
savedir = Path(workdir)  / "validation_results" / "processed_data"
Path(savedir).mkdir(parents=True, exist_ok=True)

student_answers_pivoted_merged.to_csv(savedir / "student_answers_augmented.csv", index=False)

{4: [7, 8], 3: [5, 6], 2: [3, 4], 1: [1, 2], 0: [0]}


In [87]:
index = 129
#print(student_answers_pivoted_merged.loc[index]['mark_scheme_text'])

out = student_answers_pivoted_merged[student_answers_pivoted_merged['question_id'] == 7.4]
out = out[out['student_id'] == 5002]
mark = out.loc[40]['mark_scheme_text']

print(mark)

print(extract_level_structure(mark))

Level_1:
  description: 
    - Limited application of knowledge and understanding of business concepts and issues to the business context (AO2).
  marks_awarded:
    maximum: 3
    minimum: 1
  criteria:
    - Attempts to deconstruct business information and/or issues, finding limited connections between points (AO3a).
    - Makes a judgement, providing a simple justification based on limited evaluation of business information and issues relevant to the choice made (AO3b).
  progression:
    - Students can progress to this level by demonstrating a basic understanding of business concepts and making simple connections between points.
  example_answer: 
    - Ripple provides green energy through its wind farm in South Wales. Therefore, they would have to pay less tax to the government on any profits made from production and supply of energy. (AO2).

Level_2:
  description: 
    - Sound application of knowledge and understanding of business concepts and issues to the business context alth