In [6]:
from langchain_community.chat_models import ChatOllama
from langchain.prompts import PromptTemplate
import json
from typing import Dict, List
import pandas as pd
from PyPDF2 import PdfReader
import os
from pathlib import Path
import shutil
source = Path('../classroom/watsonx_utils.py')
destination = Path('./watsonx_utils.py')
shutil.copy(source, destination)
from watsonx_utils import get_sections

In [7]:
TEST_CASES = [
    {
        "Level": "grade1",
        "Disability": "no",
        "Subject": "math",
        "NumOfSections": 3
    },
    {
        "Level": "grade1",
        "Disability": "yes",
        "Subject": "arabic",
        "NumOfSections": 4
    },
    {
        "Level": "grade2",
        "Disability": "no",
        "Subject": "science",
        "NumOfSections": 3
    },
    {
        "Level": "grade2",
        "Disability": "yes",
        "Subject": "math",
        "NumOfSections": 5
    },
    {
        "Level": "grade3",
        "Disability": "no",
        "Subject": "arabic",
        "NumOfSections": 4
    },
    {
        "Level": "grade3",
        "Disability": "yes",
        "Subject": "science",
        "NumOfSections": 3
    },
    {
        "Level": "grade4",
        "Disability": "no",
        "Subject": "math",
        "NumOfSections": 5
    },
    {
        "Level": "grade4",
        "Disability": "yes",
        "Subject": "arabic",
        "NumOfSections": 4
    },
    {
        "Level": "grade5",
        "Disability": "no",
        "Subject": "science",
        "NumOfSections": 3
    },
    {
        "Level": "grade5",
        "Disability": "yes",
        "Subject": "math",
        "NumOfSections": 5
    },
    {
        "Level": "grade6",
        "Disability": "no",
        "Subject": "arabic",
        "NumOfSections": 4
    },
    {
        "Level": "grade6",
        "Disability": "yes",
        "Subject": "science",
        "NumOfSections": 3
    },
    {
        "Level": "grade7",
        "Disability": "no",
        "Subject": "math",
        "NumOfSections": 5
    },
    {
        "Level": "grade7",
        "Disability": "yes",
        "Subject": "arabic",
        "NumOfSections": 4
    },
    {
        "Level": "grade8",
        "Disability": "no",
        "Subject": "science",
        "NumOfSections": 3
    },
    {
        "Level": "grade8",
        "Disability": "yes",
        "Subject": "math",
        "NumOfSections": 5
    },
    {
        "Level": "grade9",
        "Disability": "no",
        "Subject": "arabic",
        "NumOfSections": 4
    },
    {
        "Level": "grade9",
        "Disability": "yes",
        "Subject": "science",
        "NumOfSections": 3
    },
    {
        "Level": "grade10",
        "Disability": "no",
        "Subject": "math",
        "NumOfSections": 5
    },
    {
        "Level": "grade10",
        "Disability": "yes",
        "Subject": "arabic",
        "NumOfSections": 4
    }
]


In [None]:
class CurriculumValidator:
    def __init__(self):
        """Initialize the validator with langchain components"""
        self.llm = ChatOllama(model='llama3', format="json", temperature=0)
        
        # Prompt for content validation
        self.validate_prompt = PromptTemplate(
            template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
            You are an expert in educational content validation.
            Evaluate if the following topic and description are appropriate for:
            Grade Level: {level}
            Subject: {subject}
            Disability Accommodation Needed: {disability}

            Topic: {topic}
            Description: {description}

            Consider:
            1. Grade Level Appropriateness
            2. Subject Relevance
            3. Disability Accommodation (if needed)
            4. Content Clarity and Quality

            Return only a JSON object with this structure:
            {{"is_valid": true/false}}
            \n<|eot_id|><|start_header_id|>assistant<|end_header_id|>
            """,
            input_variables=["level", "subject", "disability", "topic", "description"]
        )

    def validate_topic(self, topic_data: Dict, params: Dict) -> Dict:
        """
        Validate a single topic's relevancy and appropriateness
        
        Args:
            topic_data (dict): Contains Topic and Description
            params (dict): Contains Level, Subject, and Disability
        
        Returns:
            dict: Validation result with is_valid and reason
        """
        try:
            prompt = self.validate_prompt.format(
                level=params['Level'],
                subject=params['Subject'],
                disability=params['Disability'],
                topic=topic_data['Topic'],
                description=topic_data['Description']
            )
            response = self.llm.predict(prompt)
            return json.loads(response)
        except Exception as e:
            return {
                "is_valid": False,
                "reason": f"Validation error: {str(e)}"
            }

    def validate_curriculum(self, sections_data: Dict, params: Dict) -> Dict:
        """
        Validate the entire curriculum sections
        
        Args:
            sections_data (dict): The data returned by get_sections
            params (dict): Original parameters used for generation
        
        Returns:
            dict: Validation results including overall and per-topic validation
        """
        results = {
            "params": params,
            "num_sections_match": False,
            "sections_validation": [],
            "overall_valid": False
        }
        
        # Check if data key exists
        if "data" not in sections_data:
            results["error"] = "Invalid format: 'data' key missing"
            return results
        
        # Validate number of sections
        actual_sections = len(sections_data["data"])
        expected_sections = params["NumOfSections"]
        results["num_sections_match"] = actual_sections == expected_sections
        
        if not results["num_sections_match"]:
            results["error"] = f"Section count mismatch. Expected: {expected_sections}, Got: {actual_sections}"
            return results
        
        # Validate each topic
        valid_topics = 0
        for topic_data in sections_data["data"]:
            validation_result = self.validate_topic(topic_data, params)
            results["sections_validation"].append({
                "topic": topic_data["Topic"],
                "validation": validation_result
            })
            if validation_result["is_valid"]:
                valid_topics += 1
        
        # Set overall validation result
        results["overall_valid"] = (valid_topics == expected_sections)
        results["valid_topics_count"] = valid_topics
        
        return results



In [None]:

test_cases = TEST_CASES
validator = CurriculumValidator()
validation_results = []

for i, params in enumerate(test_cases, 1):
    try:
        print(f"\nValidating test case {i}/{len(test_cases)}")
        print(f"Parameters: {params}")
        
        # Get sections using provided function
        sections = get_sections(params)
        
        # Validate the sections
        result = validator.validate_curriculum(sections, params)
        
        validation_results.append(result)
        
        # Print validation summary
        print(f"Sections count match: {result['num_sections_match']}")
        print(f"Valid topics: {result.get('valid_topics_count', 0)}/{params['NumOfSections']}")
        print(f"Overall valid: {result['overall_valid']}")
        
    except Exception as e:
        print(f"Error in test case {i}: {str(e)}")
        validation_results.append({
            "params": params,
            "error": str(e),
            "overall_valid": False
        })

# Print final summary
successful_cases = sum(1 for r in validation_results if r.get("overall_valid", False))
print(f"\nValidation Summary:")
print(f"Successful cases: {successful_cases}/{len(test_cases)}")
print(f"Failed cases: {len(test_cases) - successful_cases}/{len(test_cases)}")

validation_results