In [1]:
import os
os.chdir('/Users/adityachhabra/Github/zavmo/zavmo-api/zavmo')

In [2]:
from pydantic import BaseModel, Field
from collections.abc import Callable
from typing import List, Dict, Union, Optional, Any, ForwardRef, Literal
import os
from dotenv import load_dotenv
from openai import OpenAI
from openai.types.chat import ChatCompletionMessage
from openai.types.chat.chat_completion_message_tool_call import ChatCompletionMessageToolCall
import logging
from collections import defaultdict
import pandas as pd
from tqdm.auto import tqdm
import json
load_dotenv()

True

In [4]:
from helpers.chat import filter_history
from pydantic import BaseModel
import logging
from helpers._types import (
    Agent,
    function_to_json,
    Result,
    Response, 
    StrictTool,
    PermissiveTool
)
from typing import List, Dict, Any
from collections import defaultdict

In [5]:
sys="""
Based on the unit text provided, generate a comprehensive mark scheme and corresponding learning materials using Bloom's Taxonomy. 
Follow the defintion for every item in the mark scheme for each level of Bloom's Taxonomy and gain clarity on the criteria, task and response generation:

1: Develop Bloom's Taxonomy Criteria
	1.	For Each Level of Bloom's Taxonomy (Remembering, Understanding, Applying, Analyzing, Evaluating, and Creating):
	•	Define clear criteria for evaluating learners' work at each level.
	•	Use key verbs (e.g., "list," "explain," "demonstrate," "compare," "evaluate," "design") to frame expectations.
	•	Clearly articulate how learners can achieve each level, aligning criteria to the text provided.

2: Design Tasks for Learners
	1.	Create one task for each level of Bloom's Taxonomy that reflects the criteria defined in Step 1.
	•	Ensure the tasks are specific, measurable, and tailored to the unit text.
	•	Use varied formats (e.g., multiple-choice questions for Remembering, essays for Evaluating, or project-based tasks for Creating).

3: Generate Sample Responses with Grading Standards
	1.	Provide responses for each task which should set OFQUAL standards.
	2.	Align responses to the following performance bands with each item having this structure:
        level: for which the response scheme is set.
        response: variation of responses that should set OFQUAL standards.
		•	Fail (39% or less): Responses demonstrating minimal understanding or effort.
		•	Pass (40%-59%): Responses meeting basic expectations but with limited depth.
		•	Merit (60%-79%): Responses showing solid understanding, detail, and application.
		•	Distinction (80%-100%): Responses exemplifying comprehensive understanding, insight, and creativity.
"""

# agent = Agent(
#     name="Generate Mark Scheme",
#     functions=[GenerateCriteriaOnAllLevels, GenerateBenchmarkingResponseOnAllLevels],
#     model="gpt-4o-mini",
#     instructions=sys
# )

In [6]:
system_message = {'role':'system','content':sys}
ofqual_unitwise_text = """UNIT 1:
- Unit Title/Heading: Knowledge of the Service and Repair of Electrically Propelled Buses and Coaches
- Unit Aim/Purpose: This unit is for people who work on or near electric and electric/hybrid buses and coaches. It includes essential knowledge of the hazards associated with electric and electric/hybrid buses and coaches and the precautions to follow to avoid these. It enables the learner to understand how to safely isolate and reinstate the high voltage system and to remove and replace high voltage components.
- Level: 2
- Reference Number: L/618/5652
- Credit Value: 2
- Total Qualification Time (TQT): 19
- Guided Learning Hours: 16
- Learning Outcomes:
  1. Understand the risks and hazards associated with electric and electric/hybrid buses and coaches
  2. Know and understand the different types of electric and electric/hybrid buses and coaches, associated technology, components and operating principles
  3. Know how to prepare electric and electric/hybrid buses and coaches when carrying out routine service and repair procedures
  4. Know how to work safely on electric and electric/hybrid buses and coaches
- Assessment Criteria:
  1.1 Describe the health and safety legislation and workplace procedures relating to working on, near or with electric and electric/hybrid buses and coaches.
  1.2 Describe the dangers relating to working with high voltages, electrocution, battery electrolyte gel, and hazards associated with alternative fuel sources and systems, including hydrogen fuel cells.
  1.3 Describe safety requirements including First Aid, tools and equipment, ventilation, high voltage isolation, dealing with electrolyte gel spillages, environmental protection, risk assessment, and workplace signage.
  1.4 Describe vehicle power systems and their associated safety risks.
  2.1 Identify components that make up the electric and electric/hybrid systems.
  2.2 Describe basic operating principles.
  2.3 Describe the construction and function of battery types.
  2.4 Describe the construction and function of component parts.
  2.5 Describe how to store parts and components.
  3.1 Describe the preparation of the vehicle prior to conducting service/repair of the vehicle.
  4.1 Describe safe working methods for working on electric and electric/hybrid buses and coaches.

"""

## Finalized approach for generating markscheme

In [8]:
class GenerateCriteria(StrictTool):
    blooms_taxonomy_level: Literal["Remember", "Understand", "Apply", "Analyze", "Evaluate", "Create"] = Field(description="The level of Bloom's Taxonomy.")
    criteria: List[str] = Field(description="Generate very challenging criteria for the level of Bloom's Taxonomy based on the ofqual unit provided.")
    key_verbs: List[str] = Field(description="Action verbs that align with Bloom's Taxonomy level to specify the expected learning outcomes (e.g., 'define,' 'explain,' 'apply,' 'analyze,' 'evaluate,' 'create').")
    expectations: List[str] = Field(description="Expectations to meet the criteria.")
    task: str = Field(description="A task designed around the level of Bloom's Taxonomy based on the ofqual unit and criteria.")

class GenerateCriteriaOnAllLevels(StrictTool):
    task_and_criteria: List[GenerateCriteria] = Field(description="Generate criteria for all levels of Bloom's Taxonomy based on the ofqual unit provided.")
    def execute(self, context: Dict):
        return Result(value="Tasks and corresponding criteria on all levels of Bloom's Taxonomy: \n\n" + "\n".join([f"{item.task}:\n\n{item.criteria}" for item in self.task_and_criteria])+"\n\nNext step is to generate a benchmarking response for each task.")

class pass_fail_merit_distinction(StrictTool):
    """
    This class is used to generate benchmarking responses for the pass, fail, merit, and distinction for the markscheme item.
    """
    fail: str = Field(description="Response demonstrating minimal understanding or effort, showing insufficient grasp of core concepts")
    pass_: str = Field(description="Response meeting basic expectations with limited depth, showing fundamental understanding of core concepts")
    merit: str = Field(description="Response showing solid understanding, detail, and application, demonstrating thorough comprehension and ability to apply concepts")
    distinction: str = Field(description="Response exemplifying comprehensive understanding, insight, and creativity, showing exceptional mastery and innovative application of concepts")

class GenerateBenchmarkingResponse(StrictTool):
    response: List[pass_fail_merit_distinction] = Field(description="Generate benchmarking responses for the task to have variations in response considering criteria and expectations provided.")
    def execute(self, context: Dict):
        return Result(value=self.response)

def get_structured_output(func, messages=[],model="gpt-4o-mini"):
    client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

    completion = client.beta.chat.completions.parse(
    model=model,
    messages=messages,
    response_format=func,
    )
    
    return completion.choices[0].message.parsed    
    
def generate_markscheme(unit_text: str):
    messages = [system_message,
    {"role": "user", "content": f"Here is the Ofqual(Office of Qualifications and Examinations Regulation) unitwise text: {unit_text}"}]
    criteria_items = get_structured_output(GenerateCriteriaOnAllLevels, messages)

    marks_scheme = []
    for i in criteria_items.task_and_criteria:
        current_item = {}
        current_item['bloom_taxonomy_level'] = i.blooms_taxonomy_level
        current_item['criteria'] = i.criteria
        current_item['key_verbs'] = i.key_verbs
        current_item['expectations'] = i.expectations
        current_item['task'] = i.task
        
        messages = [system_message,
        {"role": "user", "content": f"Generate benchmarking responses for the task based on OFQUAL standards: {i.task},\n\nCriteria: {i.criteria},\n\nExpectations: {i.expectations}"}]
        responses = [i for i in get_structured_output(GenerateBenchmarkingResponse, messages).response[0]]
        current_item['benchmarking_responses'] = responses
        marks_scheme.append(current_item)
        
    return marks_scheme


In [19]:
df = pd.read_csv("../docs/rgcn/ofqual_units.csv")
print(df.shape)
df.head(2).to_clipboard(index=False)

(28444, 15)


In [25]:
df.columns.tolist()

['ofqual_id',
 'overview',
 'unit_id',
 'unit_title',
 'unit_description',
 'unit_learning_outcomes',
 'qualification_type',
 'qualification_level',
 'assessment_methods',
 'sector_subject_area',
 'awarding_organisation',
 'total_credits',
 'guided_learning_hours',
 'total_qualification_time',
 'awarding_organization']

In [24]:
df[df.duplicated(['unit_id'])].sort_values('unit_id')

Unnamed: 0,ofqual_id,overview,unit_id,unit_title,unit_description,unit_learning_outcomes,qualification_type,qualification_level,assessment_methods,sector_subject_area,awarding_organisation,total_credits,guided_learning_hours,total_qualification_time,awarding_organization
16820,603/4162/2,City & Guilds Level 1-3 Award Certificate in C...,1,Computer Aided Design Using 3D Computer Aided ...,Develops ability to output a 3D model drawing ...,1. Be able to output a 3D model drawing layout...,Occupational Qualification,Level 2,Portfolio of Evidence,Building and construction,/organisations/RN5334,,147.0,440.0,ProQual Awarding Body
26073,603/4803/3,This qualification enables Learners to gain co...,1,Mathematics Assessment,This component assesses all aspects of knowled...,1. Demonstrate the ability to apply mathematic...,Functional Skills,Level 1,E-assessment,Foundations for learning and life,/organisations/RN5136,,55.0,60.0,Future (Awards and Qualifications) Ltd
13801,610/4174/9,The TQUK Level 2 Adult Social Care Certificate...,1,Understand own role,Comprehend the responsibilities and standards ...,1. Identify standards and codes of conduct.\n ...,Occupational Qualification,Level 2,"Coursework, E-assessment, Observation, Portfol...",Health and social care,/organisations/RN5355,35.0,318.0,350.0,Training Qualifications UK Ltd
12396,600/1986/4,This qualification is aimed at you if you see ...,1,Description of Qualification,This is a Level 2 Certificate in Professional ...,1. Understand the nutrition and menu planning ...,Vocationally-Related Qualification,Level 2,"Multiple Choice Examination, Practical Demonst...",Hospitality and catering,/organisations/RN5217,32.0,213.0,320.0,City and Guilds of London Institute
7301,603/5973/0,This qualification has been designed to provid...,1,Unit 1,An unspecified unit within the framework of th...,1. Identify the learning objectives of this un...,Other Life Skills Qualification,Level 2,Portfolio of Evidence,Foundations for learning and life,/organisations/RN5200,8.0,80.0,80.0,Ascentis
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1371,610/5101/9,The Highfield Level 2 NVQ Diploma in Plant Ope...,Y/651/4109,Conforming to productive working practices in ...,This unit provides learners with the skills an...,1. Communicate with others to establish produc...,Vocationally-Related Qualification,Level 2,Portfolio of Evidence,Building and construction,/organisations/RN5219,75.0,250.0,750.0,Highfield Qualifications
1467,610/5067/2,The Highfield Level 2 NVQ Certificate in Plant...,Y/651/4109,Conforming to productive working practices in ...,This unit provides learners with the skills an...,1. Communicate with others to establish produc...,Vocationally-Related Qualification,Level 2,Portfolio of Evidence,Building and construction,/organisations/RN5219,25.0,84.0,250.0,Highfield Qualifications
1356,610/5100/7,The Highfield Level 2 NVQ Diploma in Plant Ope...,Y/651/4109,Conforming to productive working practices in ...,This unit provides learners with the skills an...,1. Communicate with others to establish produc...,Vocationally-Related Qualification,Level 2,Portfolio of Evidence,Building and construction,/organisations/RN5219,68.0,211.0,680.0,Highfield Qualifications
1282,610/5094/5,The Highfield Level 2 NVQ Diploma in Plant Ope...,Y/651/4109,Conforming to productive working practices in ...,This unit provides learners with the skills an...,1. Communicate with others to establish produc...,Vocationally-Related Qualification,Level 2,Portfolio of Evidence,Building and construction,/organisations/RN5219,56.0,184.0,550.0,Highfield Qualifications


In [18]:
df.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
Index: 24048 entries, 0 to 28440
Data columns (total 15 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   ofqual_id                 24048 non-null  object 
 1   overview                  24048 non-null  object 
 2   unit_id                   24047 non-null  object 
 3   unit_title                24048 non-null  object 
 4   unit_description          23985 non-null  object 
 5   unit_learning_outcomes    23954 non-null  object 
 6   qualification_type        24048 non-null  object 
 7   qualification_level       24048 non-null  object 
 8   assessment_methods        24048 non-null  object 
 9   sector_subject_area       24048 non-null  object 
 10  awarding_organisation     24048 non-null  object 
 11  total_credits             19441 non-null  float64
 12  guided_learning_hours     24048 non-null  float64
 13  total_qualification_time  24048 non-null  float64
 14  awarding_or

In [14]:
df = df.drop_duplicates(subset=['unit_id','ofqual_id'])
print(df.shape)

(24048, 15)


In [28]:
df['text'] = df.apply(lambda row: f"Unit Title: {row['unit_title']}\n  - Unit Aim: {row['unit_description']}\n  - Unit id: {row['unit_id']}\n  - Credit Value: {row['total_credits']}\n  - Total Qualification Time (TQT): {row['total_qualification_time']}\n  - Guided Learning Hours (GLH): {row['guided_learning_hours']}\n  - Learning Outcomes:\n    {row['unit_learning_outcomes']}", axis=1)

In [29]:
df['text'][0]

'Unit Title: Alcohol and Drug Misuse Awareness\n  - Unit Aim: This unit gives learners an understanding of the effects and consequences of drug and alcohol misuse.\n  - Unit id: T/504/8484\n  - Credit Value: 3.0\n  - Total Qualification Time (TQT): 30.0\n  - Guided Learning Hours (GLH): 15.0\n  - Learning Outcomes:\n    1. Understand the effects and consequences of alcohol misuse.\r\n  - Describe the physical effects of alcohol misuse\r\n  - Identify social consequences of alcohol misuse'