In [176]:
class EnhancedIterationCoder:
    """Enhanced IterationCoder that handles feedback loops and conversation history"""
    
    def __init__(self, name: str, model):
        self.name = name
        self.model = model
    
    def code_response(self, state: AgentState) -> Dict[str, Any]:
        """Code a response with conversation history support"""
        question_data = state["question_data"]
        is_question_43 = state["is_question_43"]
        conversation_history = state.get("conversation_history", [])
        
        # Prepare system and initial messages
        if is_question_43:
            system_msg = system_prompt_43
            initial_msg = f"{initial_prompt_43}\n\nQuestion: {question_data['Question']}\n\nStudent response: {question_data['Response']}"
        else:
            system_msg = system_prompt
            initial_msg = f"{initial_prompt}\n\nQuestion: {question_data['Question']}\n\nStudent response: {question_data['Response']}"
        
        # Build conversation with history
        messages = [SystemMessage(content=system_msg)]
        
        # Add conversation history if available
        if conversation_history:
            messages.extend(conversation_history)
        else:
            # First iteration - add initial message
            messages.append(HumanMessage(content=initial_msg))
        
        try:
            response = self.model.invoke(messages)
            return {
                "iteration_name": self.name,
                "raw_response": response.content,
                "timestamp": datetime.now().isoformat()
            }
        except Exception as e:
            logger.error(f"Error in {self.name}: {str(e)}")
            return {
                "iteration_name": self.name,
                "raw_response": f"Error: {str(e)}",
                "timestamp": datetime.now().isoformat(),
                "error": True
            }
    
    def code_with_feedback(self, state: AgentState, feedback: str) -> Dict[str, Any]:
        """Code with researcher feedback"""
        question_data = state["question_data"]
        is_question_43 = state["is_question_43"]
        conversation_history = state.get("conversation_history", [])
        
        # Prepare system message
        if is_question_43:
            system_msg = system_prompt_43
        else:
            system_msg = system_prompt
        
        # Build conversation with history and feedback
        messages = [SystemMessage(content=system_msg)]
        messages.extend(conversation_history)
        
        # Add feedback message
        feedback_msg = f"""Based on the researcher's feedback, please recode the response:

                            Researcher Feedback: {feedback}

                            Please provide your coding in the specified output format: {OUTPUT_FORMAT if not is_question_43 else OUTPUT_FORMAT_43}

                            JUST RETURN THE DESIRED OUTPUT, DO NOT ADD ANYTHING ELSE.
                            Return a single valid JSON object ONLY.
                            No markdown, no backticks, no prose."""
        
        messages.append(HumanMessage(content=feedback_msg))
        
        try:
            response = self.model.invoke(messages)
            return {
                "iteration_name": self.name,
                "raw_response": response.content,
                "timestamp": datetime.now().isoformat(),
                "with_feedback": True
            }
        except Exception as e:
            logger.error(f"Error in {self.name} with feedback: {str(e)}")
            return {
                "iteration_name": self.name,
                "raw_response": f"Error: {str(e)}",
                "timestamp": datetime.now().isoformat(),
                "error": True,
                "with_feedback": True
            }


In [182]:
class AgenticCodingSystem:
    """Main agentic coding system using LangGraph"""
    
    def __init__(self, model, log_file: str = "agreement_logs.txt"):
        self.model = model
        self.log_file = log_file
        
        # Initialize agents
        self.orchestrator = OrchestratorAgent(model)
        self.researcher = ResearcherAgent(model)
        self.agreement_calculator = AgreementCalculator()
        
        # Build the graph
        self.graph = self._build_graph()
    
    def _build_graph(self) -> StateGraph:
        """Build the LangGraph workflow"""
        workflow = StateGraph(AgentState)
        
        # Add nodes
        workflow.add_node("initial_iterations", run_initial_iterations_node)
        workflow.add_node("final_decision", make_final_decision_node)
        workflow.add_node("validate", validate_coding_node)
        workflow.add_node("feedback_loop", handle_feedback_loop_node)
        workflow.add_node("log_metrics", log_metrics_node)
        
        # Set entry point
        workflow.set_entry_point("initial_iterations")
        
        # Add edges
        workflow.add_edge("initial_iterations", "log_metrics")
        workflow.add_edge("log_metrics", "final_decision")
        workflow.add_conditional_edges(
            "final_decision",
            should_validate,
            {
                "validate": "validate",
                "end": END
            }
        )
        workflow.add_conditional_edges(
            "validate",
            should_continue_workflow,
            {
                "feedback_loop": "feedback_loop",
                "end": END
            }
        )
        workflow.add_edge("feedback_loop", "log_metrics")
        
        return workflow.compile()
    
    def process_question(self, response_id: str, question_key: str, question_data: Dict[str, Any], 
                        max_iterations: int = 3) -> Dict[str, Any]:
        """Process a single question through the agentic system"""
        
        # Determine if it's question 43
        question_num = question_key.split(" ")[1] if " " in question_key else question_key
        is_question_43 = (question_num == "43")
        
        # Initialize state
        initial_state = AgentState(
            response_id=response_id,
            question_key=question_key,
            question_data=question_data,
            is_question_43=is_question_43,
            iteration_results=[],
            final_codes=None,
            conversation_history=[],
            agreement_metrics={},
            researcher_feedback=None,
            max_iterations=max_iterations,
            current_iteration=0,
            is_complete=False,
            needs_feedback=False,
            orchestrator=self.orchestrator,
            researcher=self.researcher,
            agreement_calculator=self.agreement_calculator,
            log_file=self.log_file
        )
        
        # Run the workflow
        try:
            final_state = self.graph.invoke(initial_state)
            
            # Mark as complete
            final_state["is_complete"] = True
            
            # Final log
            self.agreement_calculator.log_agreement_metrics(final_state, self.log_file)
            
            return {
                "response_id": response_id,
                "question_key": question_key,
                "final_codes": final_state["final_codes"],
                "iteration_results": final_state["iteration_results"],
                "researcher_feedback": final_state["researcher_feedback"],
                "agreement_metrics": final_state["agreement_metrics"],
                "conversation_history": final_state["conversation_history"],
                "total_iterations": final_state["current_iteration"]
            }
            
        except Exception as e:
            logger.error(f"Error processing {response_id} - {question_key}: {str(e)}")
            return {
                "response_id": response_id,
                "question_key": question_key,
                "error": str(e),
                "final_codes": None
            }
    
    def process_survey_file(self, survey_file_path: str, output_dir: str = "agentic_coding_results") -> List[Dict[str, Any]]:
        """Process an entire survey file"""
        
        # Create output directory
        output_path = Path(output_dir)
        output_path.mkdir(parents=True, exist_ok=True)
        
        # Load survey data
        with open(survey_file_path, "r", encoding="utf-8") as f:
            survey_data = json.load(f)
        
        response_id = survey_data.get("ResponseId", "")
        results = []
        
        logger.info(f"Processing survey file: {survey_file_path}")
        
        # Process each question
        for key, value in survey_data.items():
            if key.startswith("Question ") and isinstance(value, dict) and "Response" in value:
                logger.info(f"Processing {key} for response {response_id}")
                
                result = self.process_question(response_id, key, value)
                results.append(result)
                
                # Save individual result
                result_file = output_path / f"{response_id}_{key.replace(' ', '_')}_result.json"
                with open(result_file, "w", encoding="utf-8") as f:
                    json.dump(result, f, indent=2, ensure_ascii=False)
        
        # Save combined results
        combined_file = output_path / f"{response_id}_all_results.json"
        with open(combined_file, "w", encoding="utf-8") as f:
            json.dump(results, f, indent=2, ensure_ascii=False)
        
        logger.info(f"Completed processing {response_id}. Results saved to {output_path}")
        
        return results


In [None]:
# # Example usage and testing
# def test_agentic_system():
#     """Test the agentic coding system with a sample question"""
    
#     # Initialize the system
#     system = AgenticCodingSystem(model_azure, log_file="test_agreement_logs.txt")
    
#     # Sample question data
#     sample_question_data = {
#         "Question": "Please describe any differences in how it impacted your learning (comparing GenAI and your teacher)?",
#         "Response": "AI tends to be more uplifting or supportive than teachers, so when I ask for its opinion on something I've written it'll compliment whereas my teacher will have edits to my work. Both are useful and important."
#     }
    
#     # Process the question
#     result = system.process_question(
#         response_id="TEST_001",
#         question_key="Question 40",
#         question_data=sample_question_data,
#         max_iterations=3
#     )
    
#     print("Processing Result:")
#     print(json.dumps(result, indent=2))
    
#     return result



In [185]:
# test_result = test_agentic_system()

In [189]:
def process_multiple_surveys(survey_dir: str = "temp_survey", output_dir: str = "agentic_coding_results"):
    """Process multiple survey files using the fixed agentic system"""
    
    # Initialize the fixed system
    system = FixedAgenticCodingSystem(model_azure, log_file=f"{output_dir}/agreement_logs.txt")
    
    # Get survey files
    survey_path = Path(survey_dir)
    survey_files = [f for f in survey_path.glob("*.json") if f.name != ".DS_Store"]
    
    all_results = []
    
    for survey_file in survey_files:
        print(f"Processing {survey_file.name}...")
        
        try:
            # Load survey data
            with open(survey_file, "r", encoding="utf-8") as f:
                survey_data = json.load(f)
            
            response_id = survey_data.get("ResponseId", "")
            results = []
            
            logger.info(f"Processing survey file: {survey_file}")
            
            # Process each question
            for key, value in survey_data.items():
                if key.startswith("Question ") and isinstance(value, dict) and "Response" in value:
                    logger.info(f"Processing {key} for response {response_id}")
                    
                    result = system.process_question_simple(response_id, key, value)
                    results.append(result)
                    
                    # Save individual result
                    result_file = Path(output_dir) / f"{response_id}_{key.replace(' ', '_')}_result.json"
                    with open(result_file, "w", encoding="utf-8") as f:
                        json.dump(result, f, indent=2, ensure_ascii=False)
            
            # Save combined results for this response
            combined_file = Path(output_dir) / f"{response_id}_all_results.json"
            with open(combined_file, "w", encoding="utf-8") as f:
                json.dump(results, f, indent=2, ensure_ascii=False)
            
            all_results.extend(results)
            logger.info(f"Completed processing {response_id}. Results saved to {output_dir}")
            
        except Exception as e:
            print(f"Error processing {survey_file.name}: {str(e)}")
            continue
    
    # Save all results
    combined_file = Path(output_dir) / "all_agentic_results.json"
    with open(combined_file, "w", encoding="utf-8") as f:
        json.dump(all_results, f, indent=2, ensure_ascii=False)
    
    print(f"\nCompleted processing {len(survey_files)} survey files")
    print(f"Total results: {len(all_results)}")
    print(f"Combined results saved to: {combined_file}")
    
    return all_results

# Uncomment to run batch processing
# batch_results = process_multiple_surveys()


In [190]:
batch_results = process_multiple_surveys()

INFO:__main__:Processing survey file: temp_survey/R_4C4j4KNUq9N3VGC.json
INFO:__main__:Processing Question 40 for response R_4C4j4KNUq9N3VGC
INFO:__main__:Processing R_4C4j4KNUq9N3VGC - Question 40
INFO:__main__:Running initial iterations...


Processing R_4C4j4KNUq9N3VGC.json...


INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:__main__:Logging initial metrics...
INFO:__main__:Making final decision...
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:__main__:Running researcher validation...
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:__main__:Processing sur

Error processing R_4C4j4KNUq9N3VGC.json: Object of type HumanMessage is not JSON serializable
Processing R_4CKOoHgU8hp01rg.json...


INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


KeyboardInterrupt: 

INFO:__main__:Processing survey file: temp_survey/R_4C4j4KNUq9N3VGC.json
INFO:__main__:Processing Question 40 for response R_4C4j4KNUq9N3VGC
INFO:__main__:Processing R_4C4j4KNUq9N3VGC - Question 40
INFO:__main__:Running initial iterations...


Processing R_4C4j4KNUq9N3VGC.json...


INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:__main__:Logging initial metrics...
INFO:__main__:Making final decision...
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:__main__:Running researcher validation...
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:__main__:Processing Que

Processing R_4CKOoHgU8hp01rg.json...


INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:__main__:Logging initial metrics...
INFO:__main__:Making final decision...
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:__main__:Running researcher validation...
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:__main__:Processing Que

Processing R_43e00PZDT3YYIZ2.json...


INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:__main__:Logging initial metrics...
INFO:__main__:Making final decision...
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:__main__:Running researcher validation...
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:__main__:Handling feedb

Processing R_4P0PnRK3k4DjAxR.json...


INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /chat/completions in 6.000000 seconds
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /chat/completions in 2.000000 seconds
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-vers

Processing R_4Ka5vRl3NuBJ1Af.json...


INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /chat/completions in 5.000000 seconds
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:__main__:Logging initial metrics...
INFO:__main__:Making final decision...
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-pre

Processing R_4AYVtTmMd0q7vte.json...


INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /chat/completions in 4.000000 seconds
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:__main__:Logging initial metrics...
INFO:__main__:Making final decision...
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-pre

Processing R_1gTNWbmwxtRxrf8.json...


INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /chat/completions in 3.000000 seconds
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /chat/completions in 1.000000 seconds
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-vers

Processing R_4sn9pI5HdvhV5gI.json...


INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /chat/completions in 4.000000 seconds
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:__main__:Logging initial metrics...
INFO:__main__:Making final decision...
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-pre

Processing R_1VQ5hmwtdpRmnmu.json...


INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /chat/completions in 6.000000 seconds
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /chat/completions in 9.000000 seconds
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-vers

Processing R_4aRLJoWmhKsYLOF.json...


INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /chat/completions in 6.000000 seconds
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:__main__:Logging initial metrics...
INFO:__main__:Making final decision...
INFO:httpx:HTTP Request: POST https://cic-topic-modeling.openai.azure.com/openai/deployments/gpt-4-32k/chat/completions?api-version=2024-05-01-pre


Completed processing 10 survey files
Total results: 24
Combined results saved to: agentic_coding_results/all_agentic_results.json


In [197]:
# # Fixed version of the AgenticCodingSystem
# class FixedAgenticCodingSystem:
#     """Fixed version of the agentic coding system"""
    
#     def __init__(self, model, log_file: str = "agreement_logs.txt"):
#         self.model = model
#         self.log_file = log_file
        
#         # Initialize agents
#         self.orchestrator = OrchestratorAgent(model)
#         self.researcher = ResearcherAgent(model)
#         self.agreement_calculator = AgreementCalculator()
    
#     def process_question_simple(self, response_id: str, question_key: str, question_data: Dict[str, Any], 
#                                max_iterations: int = 3) -> Dict[str, Any]:
#         """Process a single question through the agentic system (simplified version)"""
        
#         # Determine if it's question 43
#         question_num = question_key.split(" ")[1] if " " in question_key else question_key
#         is_question_43 = (question_num == "43")
        
#         logger.info(f"Processing {response_id} - {question_key}")
        
#         try:
#             # Step 1: Run initial iterations
#             logger.info("Running initial iterations...")
#             iteration_results = []
#             conversation_history = []
            
#             for coder in self.orchestrator.iteration_coders:
#                 result = coder.code_response({
#                     "question_data": question_data,
#                     "is_question_43": is_question_43,
#                     "conversation_history": conversation_history
#                 })
#                 iteration_results.append(result)
                
#                 # Parse the response
#                 parsed_obj, error = parse_iteration_json(result["raw_response"])
#                 result["parsed_response"] = parsed_obj
#                 result["parse_error"] = error
                
#                 # Add to conversation history
#                 if not result.get("error", False):
#                     conversation_history.append(HumanMessage(content=result["raw_response"]))
            
#             # Step 2: Log initial metrics
#             logger.info("Logging initial metrics...")
#             iteration_agreement = self.agreement_calculator.calculate_iteration_agreement(iteration_results)
            
#             # Step 3: Make final decision
#             logger.info("Making final decision...")
#             valid_results = [r for r in iteration_results if not r.get("error", False) and r.get("parsed_response")]
            
#             if not valid_results:
#                 logger.warning("No valid iteration results found")
#                 final_codes = {"error": "No valid coding results"}
#             else:
#                 # Use the orchestrator model to make final decision
#                 decision_prompt = f"""As an expert researcher, you need to make a final decision on the coding for this question based on three iterations.

# Question: {question_data['Question']}
# Student Response: {question_data['Response']}

# Iteration Results:
# """
                
#                 for i, result in enumerate(valid_results, 1):
#                     decision_prompt += f"\nIteration {i}:\n{json.dumps(result['parsed_response'], indent=2)}\n"
                
#                 decision_prompt += f"""

# Please analyze these iterations and provide the final coding decision. Consider:
# 1. Consistency across iterations
# 2. Quality of reasoning
# 3. Adherence to coding framework

# Provide your final decision in the same format: {OUTPUT_FORMAT if not is_question_43 else OUTPUT_FORMAT_43}

# JUST RETURN THE DESIRED OUTPUT, DO NOT ADD ANYTHING ELSE.
# Return a single valid JSON object ONLY.
# No markdown, no backticks, no prose."""

#                 try:
#                     response = self.model.invoke([HumanMessage(content=decision_prompt)])
#                     final_obj, error = parse_iteration_json(response.content)
#                     final_codes = final_obj if final_obj else {"_raw": response.content, "_error": error}
#                 except Exception as e:
#                     logger.error(f"Error in final decision: {str(e)}")
#                     final_codes = {"error": str(e)}
            
#             # Step 4: Researcher validation
#             logger.info("Running researcher validation...")
#             validation_prompt = f"""As a senior researcher, you need to validate the final coding decision for this question.

# Question: {question_data['Question']}
# Student Response: {question_data['Response']}

# Final Coding Decision:
# {json.dumps(final_codes, indent=2)}

# Original Iteration Results:
# """
            
#             for i, result in enumerate(iteration_results, 1):
#                 validation_prompt += f"\nIteration {i}:\n{json.dumps(result.get('parsed_response', {}), indent=2)}\n"
            
#             validation_prompt += """

# Please provide your validation in the following JSON format:
# {
#     "agreement": true/false,
#     "quality_score": 1-5,
#     "feedback": "Detailed feedback explaining your decision",
#     "discrepancies": ["list of major issues if any"],
#     "recommendations": ["list of suggestions for improvement if any"]
# }

# Consider:
# 1. Accuracy of coding according to the framework
# 2. Consistency with iteration results
# 3. Quality of reasoning
# 4. Completeness of analysis
# 5. Adherence to coding strategy

# JUST RETURN THE DESIRED OUTPUT, DO NOT ADD ANYTHING ELSE.
# Return a single valid JSON object ONLY.
# No markdown, no backticks, no prose."""

#             try:
#                 response = self.model.invoke([HumanMessage(content=validation_prompt)])
#                 validation_obj, error = parse_iteration_json(response.content)
                
#                 if validation_obj:
#                     researcher_feedback = validation_obj
#                     needs_feedback = not validation_obj.get("agreement", False)
#                 else:
#                     logger.error(f"Failed to parse researcher validation: {error}")
#                     researcher_feedback = {"agreement": True, "error": error}
#                     needs_feedback = False
                    
#             except Exception as e:
#                 logger.error(f"Error in researcher validation: {str(e)}")
#                 researcher_feedback = {"agreement": True, "error": str(e)}
#                 needs_feedback = False
            
#             # Step 5: Handle feedback loop if needed
#             current_iteration = 1
#             if needs_feedback and current_iteration < max_iterations:
#                 logger.info("Handling feedback loop...")
#                 feedback = researcher_feedback.get("feedback", "")
                
#                 # Run new iterations with feedback
#                 new_iteration_results = []
#                 for coder in self.orchestrator.iteration_coders:
#                     result = coder.code_with_feedback({
#                         "question_data": question_data,
#                         "is_question_43": is_question_43,
#                         "conversation_history": conversation_history
#                     }, feedback)
#                     new_iteration_results.append(result)
                    
#                     # Parse the response
#                     parsed_obj, error = parse_iteration_json(result["raw_response"])
#                     result["parsed_response"] = parsed_obj
#                     result["parse_error"] = error
                    
#                     # Add to conversation history
#                     if not result.get("error", False):
#                         conversation_history.append(HumanMessage(content=result["raw_response"]))
                
#                 iteration_results = new_iteration_results
#                 current_iteration += 1
                
#                 # Recalculate agreement
#                 iteration_agreement = self.agreement_calculator.calculate_iteration_agreement(iteration_results)
            
#             # Step 6: Final logging
#             timestamp = datetime.now().isoformat()
#             log_entry = {
#                 "timestamp": timestamp,
#                 "response_id": response_id,
#                 "question_key": question_key,
#                 "iteration": current_iteration,
#                 "iteration_agreement": iteration_agreement,
#                 "researcher_feedback": researcher_feedback,
#                 "needs_feedback": needs_feedback
#             }
            
#             # Append to log file
#             with open(self.log_file, "a", encoding="utf-8") as f:
#                 f.write(json.dumps(log_entry) + "\n")
            
#             return {
#                 "response_id": response_id,
#                 "question_key": question_key,
#                 "final_codes": final_codes,
#                 "iteration_results": iteration_results,
#                 "researcher_feedback": researcher_feedback,
#                 "agreement_metrics": {
#                     "iteration_agreement": iteration_agreement,
#                     "timestamp": timestamp
#                 },
#                 "conversation_history": conversation_history,
#                 "total_iterations": current_iteration
#             }
            
#         except Exception as e:
#             logger.error(f"Error processing {response_id} - {question_key}: {str(e)}")
#             return {
#                 "response_id": response_id,
#                 "question_key": question_key,
#                 "error": str(e),
#                 "final_codes": None
#             }

# # Test the fixed system
# def test_fixed_agentic_system():
#     """Test the fixed agentic coding system"""
    
#     try:
#         # Initialize the system
#         system = FixedAgenticCodingSystem(model_azure, log_file="test_agreement_logs_fixed.txt")
        
#         # Sample question data
#         sample_question_data = {
#             "Question": "Please describe any differences in how it impacted your learning (comparing GenAI and your teacher)?",
#             "Response": "AI tends to be more uplifting or supportive than teachers, so when I ask for its opinion on something I've written it'll compliment whereas my teacher will have edits to my work. Both are useful and important."
#         }
        
#         # Process the question
#         result = system.process_question_simple(
#             response_id="TEST_001",
#             question_key="Question 40",
#             question_data=sample_question_data,
#             max_iterations=3
#         )
        
#         print("Processing Result:")
#         print(json.dumps(result, indent=2))
        
#         return result
        
#     except Exception as e:
#         print(f"Error in test_fixed_agentic_system: {str(e)}")
#         import traceback
#         traceback.print_exc()
#         return None

# # Run the fixed test
# test_result_fixed = test_fixed_agentic_system()


In [196]:
agentic_comparison_df = compare_agentic_human_codings(
    'agentic_coding_results/all_agentic_results.json',
    'temp_survey'
)

agentic_comparison_df

Unnamed: 0,ResponseId,Question,Human_Actor_Comparator,Human_Characteristics,Agentic_Actor_Comparator,Agentic_Characteristics,Agentic_Themes,Agentic_SubThemes,Agentic_Reasonings,Researcher_Agreement,Quality_Score,Total_Iterations,Iteration_Agreement
0,R_4C4j4KNUq9N3VGC,Question 40,"[A-more, T-more, T-more]","[Speed, Personal, In-depth]","[A-More, T-More, T-More]","[Speed, Personal, Understanding]","[Processes, Relational, Sense-making]","[Access, Relational, Sense-making]",[The phrase 'GenAI gives instant answers anyti...,True,5,1,0.333333
1,R_4C4j4KNUq9N3VGC,Question 41,"[A-more, T-more]","[No impact, Positive]","[A-More, T-More, T-More]","[Less effort, Personal, Positivity]","[Processes, Relational, Information]","[Effort, Relational, Tone]",[The term 'efficient' suggests that GenAI feed...,True,5,1,0.833333
2,R_4C4j4KNUq9N3VGC,Question 42,"[A-more, T-more, T-more]","[Speed, Specificity, Personal]","[A-More, A-More, T-More, T-More]","[Speed, Objective, In-depth, Personal]","[Processes, Information, Information, Relational]","[Access, Quality, Quality, Relational]",[The phrase 'GenAI feedback was quicker' direc...,True,5,1,0.333333
3,R_4CKOoHgU8hp01rg,Question 40,"[A-more, A-less]","[Speed, In-depth]","[A-More, A-Less, T-More, T-More, A-Less, T-Mor...","[Speed, In-depth, In-depth, Understanding, Per...","[Processes, Information, Information, Sense-ma...","[Access, Quality, Quality, Understanding, Rela...",[The phrase 'Provides instant feedback' direct...,True,5,1,0.904762
4,R_4CKOoHgU8hp01rg,Question 41,[],[],[],[],[],[],[],True,5,1,1.0
5,R_4CKOoHgU8hp01rg,Question 42,"[A-more, T-more, A-less, T-more]","[Speed, Contextualised, Relevance, Personal]","[A-Less, T-More, T-More, A-More, A-Less, T-Mor...","[In-depth, Specificity, Personal, Speed, Conte...","[Information, Information, Relational, Process...","[Quality, Quality, Relational, Access, Quality...","[The statement 'sometimes lacks detailed, pers...",True,5,1,1.0
6,R_43e00PZDT3YYIZ2,Question 43,[],"[Unaware, Contextualised]",,"[Unaware, Contextualised, Unaware]","[Processes, Information, Processes]","[Access, Quality, Access]",[The student states 'I didn't know how / if ge...,False,3,2,1.0
7,R_4P0PnRK3k4DjAxR,Question 40,[A-more],[Ease],"[A-More, T-Less]","[Ease, Personal]","[Processes, Information]","[Access, Quality]",[The phrase 'more convenient/timely to get fee...,False,3,2,1.0
8,R_4P0PnRK3k4DjAxR,Question 41,[A-more],[Understanding],[A-More],[Understanding],[Sense-making],[Sense-making],[The statement 'you can ask the AI to explain ...,True,5,1,0.333333
9,R_4P0PnRK3k4DjAxR,Question 42,[],[],[],[],[],[],[],True,5,1,1.0


## 0. Prompts

In [156]:
aim_research = """

                The aim of this study is to investigate how university students use, value, and trust feedback generated by Generative Artificial Intelligence (GenAI) 
                compared to feedback from teachers. Specifically, the research seeks to understand the different roles that GenAI and teacher feedback play in 
                higher education, the factors that influence students’ perceptions of their usefulness, reliability, and trustworthiness, and the ways in which these 
                forms of feedback shape students’ learning experiences, emotions, and engagement with feedback processes.  

                """

actor_codes =  """
                # Code: A  

                **Explanation:** GenAI (also variants such as AI, ChatGPT, etc.)  

                ---

                # Code: T  

                **Explanation:** Teacher (also variants such as lecturer, tutor, staff, etc.)  

                ---

                # Code: B  

                **Explanation:** Both (clear reference to both actors)  

              """
               
comparator_codes = """

                    # Code: More  

                    **Explanation:** When the comment indicated that the actor was associated with more (volume, frequency, impact) of the characteristic.   

                    ---

                    # Code: Less  

                    **Explanation:** As above but in reference to less or smaller volume, frequency or impact of the characteristic.  

                    ---

                    # Code: Similar  

                    **Explanation:** When the comment made an explicit statement that both GenAI and Teachers were similar in some way (e.g., they were similarly useful).  


                    """
                    
characteristic_codes = """
                        # Theme: Processes  

                        ## Sub-theme: Access  

                        ### Code: Ease  
                        **Definition:** Is easily accessible, available, or convenient.  

                        ### Code: Speed  
                        **Definition:** Is immediate, instant or fast. Described as time-efficient, saving time.  

                        ### Code: Volume  
                        **Definition:** Reference to quantity, volume, scale of information or feedback interactions (e.g., could ask many questions and get lots of responses).  

                        ## Sub-theme: Timing  

                        ### Code: Before submission  
                        **Definition:** Provides information during the production of an assignment task.  

                        ### Code: After submission  
                        **Definition:** Provides information after the submission of an assignment task.  

                        ## Sub-theme: Effort  

                        ### Code: Less effort  
                        **Definition:** Reduces effort associated with the task. This may be making a task easier in some way, or it may refer to doing the task entirely.  

                        ---

                        # Theme: Sense-making  

                        ## Sub-theme: Sense-making 

                        ### Code: Understanding  
                        **Definition:** Helps me understand.  

                        ### Code: Reflection  
                        **Definition:** Helps me reflect.  

                        ### Code: Progress    
                        **Definition:** Helps me know how I’m going.  

                        ---

                        # Theme: Information  

                        ## Sub-theme: Quality  

                        ### Code: Specificity  
                        **Definition:** Provides specific or detailed information. Antonyms: vague, generic.  

                        ### Code: In-depth  
                        **Definition:** Provides in-depth or nuanced information. Antonyms: superficial, broad.  

                        ### Code: Understandable  
                        **Definition:** Information is presented in an understandable, digestible, and comprehensible way.  

                        ### Code: Relevance  
                        **Definition:** Information is relevant, on topic.  

                        ### Code: Contextualised  
                        **Definition:** Provides information that is adapted for the context of an assignment, rubrics, discipline, or class. (Not relational).  

                        ### Code: Utility  
                        **Definition:** Provides useful, usable, or helpful information.  

                        ### Code: Reliable  
                        **Definition:** Provides information that is accurate, precise, reliable, or trustworthy.  

                        ### Code: Objective  
                        **Definition:** Is objective in its feedback, judgment, or evaluation. Antonyms: subjective, biased.  

                        ## Sub-theme: Tone  

                        ### Code: Positivity  
                        **Definition:** Makes positive statements, people pleaser

                        ### Code: Negativity  
                        **Definition:** Makes edits or provides statements that are perceived as negative in tone (e.g. dismissive, insulting, uncaring).  

                        ---

                        # Theme: Feeling  
                        
                        ## Sub-theme: Feeling  

                        ### Code: Positive  
                        **Definition:** Makes me feel a positive feeling. Synonyms: Encouraging, motivating.  

                        ### Code: Negative  
                        **Definition:** Makes me feel a negative feeling. Synonyms: Frustrating. 

                        ### Code: No impact
                        **Definition:** Makes me feel nothing or indifferent. Has no impact on my emotions or feelings.

                        ---

                        # Theme: Relational  

                        ## Sub-theme: Relational
                         
                        ### Code: Personal   
                        **Definition:** Is supportive. Concerns relational type statements (e.g. the “teacher knows me”’). Closer/ more-distant or embodied (e.g. “I like being face to face with my teacher”). Some statements are about AI being more in tune, more personalised, personal to tastes in language, communication and "learning styles".

                        ### Code: Risky  
                        **Definition:** Makes me vulnerable to their judgement, causes me shame. Possibly related to power, position, self. Synonyms: Implies personal consequence, at risk. Antonyms: non-judgemental, no shame, preserves self, prevents vulnerability

                        ### Code: Expert  
                        **Definition:** Has a position of knowledge or expertise.  

                        ---

                        # Theme: Value  

                        ## Sub-theme: Value  

                        ### Code: Importance 
                        **Definition:** Is important or valuable.  

"""

characteristic_codes_v2 = """

            # Theme: Processes  

            ## Sub-theme: Access  

            ### Code: Ease  
            **Definition:** Refers to the convenience of accessing feedback or support. It captures situations where obtaining assistance is straightforward, uncomplicated, and does not involve significant barriers compared to traditional sources like teachers or professors.  

            ### Code: Speed  
            **Definition:** Describes the immediacy of receiving responses or feedback. It highlights efficiency and time-saving aspects, where support is delivered rapidly enough to accelerate the learning or task process.  

            ### Code: Volume  
            **Definition:** Relates to the quantity of feedback or information available. It reflects the ability to engage in multiple interactions, ask numerous questions, and receive abundant responses compared to more limited human feedback.  

            ## Sub-theme: Timing  

            ### Code: Before submission  
            **Definition:** Feedback is provided during the drafting or preparation stage of an assignment. This allows for real-time adjustments, improvements, and refinements prior to final submission.  

            ### Code: After submission  
            **Definition:** Feedback is received after an assignment has been submitted and graded. While it can inform future work, it does not directly impact the already completed task.  

            ## Sub-theme: Effort  

            ### Code: Less effort  
            **Definition:** Highlights a reduction in the amount of work, energy, or cognitive load required to complete a task. This includes simplifying processes, providing starting points, or removing barriers that typically demand more effort from the student.  

            ---

            # Theme: Sense-making  

            ## Sub-theme: Sense-making  

            ### Code: Understanding  
            **Definition:** Enables students to better comprehend concepts, ideas, or tasks. It involves organising thoughts, clarifying confusion, and making content more digestible for learning.  

            ### Code: Reflection  
            **Definition:** Encourages self-examination and critical thinking about one’s own learning, performance, or perspective. It involves generating insights that prompt deeper consideration of personal approaches.  

            ### Code: Progress    
            **Definition:** Provides indicators of how well a student is performing or advancing. It supports self-monitoring by offering feedback that signals whether learning or task outcomes are on track.  

            ---

            # Theme: Information  

            ## Sub-theme: Quality  

            ### Code: Specificity  
            **Definition:** Refers to feedback or information that is detailed, concrete, and precise rather than vague or generic.  

            ### Code: In-depth  
            **Definition:** Captures the extent to which feedback is comprehensive, nuanced, and layered. It contrasts with broad or superficial responses by offering thorough explanations or contextual depth.  

            ### Code: Understandable  
            **Definition:** Information is communicated in a way that is clear, simple, and easy to grasp. This includes structuring content so that it is digestible and accessible to the learner.  

            ### Code: Relevance  
            **Definition:** Feedback or information aligns closely with the student’s needs, topic, or task requirements, avoiding off-topic or tangential content.  

            ### Code: Contextualised  
            **Definition:** Feedback is adapted to the specific assignment, discipline, or criteria. It demonstrates awareness of situational requirements rather than offering generic guidance.  

            ### Code: Utility  
            **Definition:** Information is practical, usable, and directly helpful in solving problems, advancing tasks, or exploring new ideas.  

            ### Code: Reliable  
            **Definition:** Feedback is characterised by trustworthiness, accuracy, and precision. It avoids misleading or erroneous information.  

            ### Code: Objective  
            **Definition:** Feedback is impartial and free of personal bias. It is grounded in factual or consistent standards rather than subjective judgments or preferences.  

            ## Sub-theme: Tone  

            ### Code: Positivity  
            **Definition:** Feedback is encouraging, affirming, or expressed in a supportive tone. It creates a positive interpersonal dynamic, often aligning with a people-pleasing or agreeable style.  

            ### Code: Negativity  
            **Definition:** Feedback is expressed in a critical or discouraging tone, sometimes perceived as dismissive, harsh, or uncaring.  

            ---

            # Theme: Feeling  

            ## Sub-theme: Feeling  

            ### Code: Positive  
            **Definition:** Feedback evokes feelings of encouragement, motivation, or appreciation. It creates a sense of being supported, valued, or cared for.  

            ### Code: Negative  
            **Definition:** Feedback generates negative emotions such as frustration, discouragement, or shame. It may undermine confidence or self-worth.  

            ### Code: No impact  
            **Definition:** Feedback is emotionally neutral, leaving the student indifferent. It does not trigger any significant positive or negative emotional response.  

            ---

            # Theme: Relational  

            ## Sub-theme: Relational  

            ### Code: Personal   
            **Definition:** Feedback reflects personalisation and relational support. It demonstrates awareness of individual needs, progress, or preferences, fostering a sense of connection or tailored guidance.  

            ### Code: Risky  
            **Definition:** Feedback situations evoke vulnerability or fear of judgment. It reflects perceived risks to self-esteem, identity, or safety in the learning relationship.  

            ### Code: Expert  
            **Definition:** Feedback is grounded in knowledge, authority, and subject expertise. It reflects the credibility and informed position of the source.  

            ---

            # Theme: Value  

            ## Sub-theme: Value  

            ### Code: Importance 
            **Definition:** Captures the perceived significance or weight given to feedback. It reflects judgments about which sources of feedback are most valuable or influential in shaping learning outcomes.  


"""
                        
CODING_STRATEGY = """
    When coding student responses:

    1. **Surface-Level Understanding**  
    - Do not over-interpret student responses.  
    - Work from the most direct, surface-level meaning of the text.  
    - Avoid reading beyond what is explicitly written.

    2. **Chunk-Based Coding**  
    - Divide each response into meaningful chunks (a sentence or a significant phrase).  
    - A chunk should only be coded once, even if it could theoretically fit multiple codes.

    3. **Focus on Comparison**  
    - Prioritize identifying any comparison between AI feedback and teacher feedback.  
    - Look for explicit or implicit comparative language such as *more, less, better, worse, easier, harder, similar*.  

    4. **Actor + Comparator + Characteristic Logic**  
    - For each usable chunk, identify three components:  
        **Actor** – Who or what the comment refers to (A = AI, T = Teacher, B = Both).  
        **Comparator** – The relative position (e.g., more, less, similar, better, worse).  
        **Characteristic** – The feedback property being described (e.g., clarity, accuracy, relevance, timeliness, emotional safety).  
    - A valid coded chunk must include all three components.  

    5. **Handling Unusable Responses**  
    - If a response is unintelligible or does not appear to answer the question, code it as **UNUSABLE**.  
    - If a chunk is missing **any one** of the three required components (Actor, Comparator, Characteristic), code it as **UNUSABLE**.  
    - Example of unusable: "It was better" – we don’t know *which actor* it refers to.  

"""

CODING_STRATEGY_V2 = """

    When coding student responses:
    1. **Surface-Level Understanding**  
    - Keep interpretations minimal: code *only* what is directly and explicitly stated.  
    - Do not infer hidden meanings, motivations, or consequences.  

    2. **Chunk-Based Coding (Restrained)**  
    - Divide a response only if there are **clear shifts in meaning** (e.g., “while,” “but,” “however”).  
    - Avoid splitting into too many micro-chunks. A whole sentence is usually a single chunk.  
    - **Max 2 chunks per sentence** unless multiple actors (AI vs Teacher) are explicitly compared.

    3. **Salience over Exhaustiveness**  
    - Prioritize **main comparisons** or feelings.  
    - Do not code every possible nuance if it dilutes the focus.  
    - Example: If a chunk has both a tone (“neutral”) and an efficiency descriptor (“efficient”), select the **dominant characteristic** (in this case, tone/feeling = “No impact”).

    4. **Actor + Comparator + Characteristic Rule**  
    - A valid code requires all three: Actor (A/T/B), Comparator (more/less/similar), and Characteristic (e.g., clarity, personal, timeliness).  
    - If multiple characteristics appear but relate to the *same actor/comparator*, treat them as **one code**.  
    - Example: “Teacher’s feedback was personal and encouraging” → single code: Actor = Teacher, Comparator = More, Characteristic = Personal/Positive.  

    5. **Dominant Coding Principle**  
    - When multiple possible codes exist in the same chunk, select the **most prominent or emotionally central one**.  
    - Example: “GenAI felt neutral and efficient” → dominant code = “No impact” (feeling tone). “Efficient” is secondary and not coded separately.

    6. **Handling Unusable Responses**  
    - If actor, comparator, or characteristic is missing, mark as **UNUSABLE**.  
    - Example: “It was better” → UNUSABLE.  
    - Example: “Feedback was neutral” (no comparison between actors) → UNUSABLE.  


"""

OUTPUT_FORMAT = """
    {
        "codes": [
            {   
                "actor": "...",
                "comparator": "...",
                "theme": "...",
                "sub_theme": "...",
                "code": "...",
                "reasoning": "..."
            }
        ]
    }
"""

SHOTS = """

    Examples of a good output are the next ones:

    Example 1:

    Question: "Please describe any differences in how it impacted your learning (comparing GenAI and your teacher)?"

    Student response: "AI tends to be more uplifting or supportive than teachers, so when I ask for its opinion on something I’ve written it’ll compliment whereas my teacher will have edits to my work. Both are useful and important."

    GOOD OUTPUT:

    {
        "codes": [
            {   
                "actor": "A",
                "comparator": "More",
                "theme": "Information",
                "sub_theme": "Tone",
                "code": "Positivity",
                "reasoning": "The phrases 'more uplifting or supportive' and 'it’ll compliment' directly indicate positive affective framing. By definition, the Positivity code captures feedback that is polite, supportive, or 'people-pleasing', which matches the student’s description of AI’s tone relative to teachers."
            },
            {   
                "actor": "T",
                "comparator": "More",
                "theme": "Information",
                "sub_theme": "Quality",
                "code": "Utility",
                "reasoning": "The contrast 'AI will compliment whereas my teacher will have edits' signals that teacher feedback contains actionable, text-level changes. This aligns with the Utility code which captures feedback that is practical and useful for making concrete improvements, as evidenced by the teacher providing specific edits rather than just praise."
            },
            {
                "actor": "B",
                "comparator": "Similar",
                "theme": "Value",
                "sub_theme": "Value",
                "code": "Importance",
                "reasoning": "The closing statement 'Both are useful and important' is an explicit equivalence claim about value. The Importance code captures perceived significance; here the student assigns comparable importance to both AI and teacher feedback."
            }
        ]
    }

    Example 2:

    Question: "Please describe any differences in how it impacted your learning (comparing GenAI and your teacher)?"

    Student response: "Provides more immediate feedback while working on assignments when the teacher wasn't yet able to."

    GOOD OUTPUT:

    {
        "codes": [
            {   
                "actor": "A",
                "comparator": "More",
                "theme": "Processes",
                "sub_theme": "Access",
                "code": "Speed",
                "reasoning": "The term 'more immediate' directly maps to the Speed code (immediate/instant/time-efficient). The temporal clause 'while working on assignments' highlights availability during production (pre‑submission), reinforcing that AI’s response time outpaces the teacher’s in this context."
            }
        ]
    }

    Example 3:

    Question: "Were there any differences in how the feedback made you feel (comparing GenAI and your teacher)?"

    Student response: "AI made me feel good and teachers make me feel bad and dumb."

    GOOD OUTPUT:

    {
        "codes": [
            {   
                "actor": "A",
                "comparator": "More",
                "theme": "Feeling",
                "sub_theme": "Feeling",
                "code": "Positive",
                "reasoning": "The literal statement 'AI made me feel good' is a direct indicator of positive emotional impact. The Positive code captures encouraging or motivating feelings, which the student explicitly attributes to AI in contrast to teachers."
            },
            {   
                "actor": "T",
                "comparator": "More",
                "theme": "Feeling",
                "sub_theme": "Feeling",
                "code": "Negative",
                "reasoning": "The student reports teachers 'make me feel bad and dumb', which is an explicit negative affective response. The Negative code covers discouraging or frustrating emotions, indicating a stronger negative impact from teacher feedback relative to AI."
            }
        ]
    }

"""

reasons_codes = """

                # Theme: Processes

                ## Sub-theme: Access

                ### Code: Unaware
                **Definition:** Respondents didn’t know GenAI could provide feedback; or didn’t know how to use it; or didn’t think of using it.  

                ## Sub-theme: Effort

                ### Code: Effortful
                **Definition:** Too much effort to use AI or learn how to use AI; don’t have time to use AI (e.g., last-minute assignments).  

                ### Code: Less effort
                **Definition:** Off-loads effort or experience of learning, which in this context is not desirable.  

                ---

                # Theme: Information

                ## Sub-theme: Quality

                ### Code: Specificity
                **Definition:** Provides specific or detailed information. Antonyms: vague, generic.  

                ### Code: In-depth
                **Definition:** Not enough, or sometimes not useful because it offers too much depth or additional explanation than desired.  

                ### Code: Contextualised
                **Definition:** Information provided lacks the context of an assignment, rubrics, discipline or class.  

                ### Code: Utility
                **Definition:** Useful, usable, or helpful information.  

                ### Code: Trustworthy
                **Definition:** Respondent does not trust the information; it is not reliable, or the provider/source is not competent to provide trustworthy information.  

                ## Sub-theme: Tone

                ### Code: Positivity
                **Definition:** Makes positive statements; people pleaser.  

                ---

                # Theme: Relational

                ## Sub-theme: Relational

                ### Code: Personal
                **Definition:** Respondent values relationship or closer and embodied feedback; values the role or connection with humans.  

                ### Code: Expert
                **Definition:** Has a position of knowledge or expertise.  

                ---

                # Theme: Value

                ## Sub-theme: Value

                ### Code: Preference
                **Definition:** Respondent preferred not using GenAI without specifying further reasons or expressing dislike.  

                ### Code: Need
                **Definition:** Does not want or need feedback; does not need feedback from AI (own skills or others are sufficient); no necessity to use AI for feedback.  

                ### Code: Unsustainable
                **Definition:** Respondent finds GenAI is not environmentally sustainable.  

                ### Code: Privacy
                **Definition:** Respondent reports concern regarding privacy.  

                ### Code: Integrity
                **Definition:** Respondent committed to the originality of own work and ethical behaviour in academic work.  

"""

reasons_codes_v2 = """

        # Theme: Processes  

        ## Sub-theme: Access  

        ### Code: Unaware  
        **Definition:** Captures situations where students are unaware that GenAI can provide feedback, lack knowledge of how to use it, or simply do not consider using it. It reflects both unawareness of the tool’s potential and uncertainty about its operation.  

        ## Sub-theme: Effort  

        ### Code: Effortful  
        **Definition:** Highlights the perception that using GenAI requires excessive time or energy, either in learning how to use it effectively or in applying it under time pressure. This effort outweighs perceived benefits, particularly in rushed academic contexts.  

        ### Code: Less effort  
        **Definition:** Reflects concerns that using GenAI reduces the necessary effort involved in learning. Students perceive that offloading work to AI undermines the personal challenge, skill-building, and intellectual growth required for academic and professional development.  

        ---

        # Theme: Information  

        ## Sub-theme: Quality  

        ### Code: Specificity  
        **Definition:** Emphasises the need for feedback that is detailed and tailored to the student’s work. AI-generated responses are perceived as too generic or vague compared to the specific input students seek.  

        ### Code: In-depth  
        **Definition:** Reflects dissatisfaction with the quality of feedback when AI provides overly broad or superficial input, lacking nuance, or alternatively giving excessive depth that is not practical or useful for the task.  

        ### Code: Contextualised  
        **Definition:** Highlights the limitation of AI feedback when it lacks alignment with assignment instructions, rubrics, or disciplinary expectations. Students value feedback that is situated in the academic context, which AI cannot fully replicate.  

        ### Code: Utility  
        **Definition:** Concerns the practical usefulness of AI-generated feedback. Students report that AI often fails to provide relevant or actionable guidance, limiting its effectiveness for improving their work.  

        ### Code: Trustworthy  
        **Definition:** Reflects doubts about the accuracy, reliability, and legitimacy of AI feedback. Students question the competence of AI as a source of evaluation and express distrust due to perceived biases or lack of accountability.  

        ## Sub-theme: Tone  

        ### Code: Positivity  
        **Definition:** Identifies AI’s tendency to adopt an overly agreeable or affirming tone. Feedback is seen as flattering or “people-pleasing,” potentially at the expense of critical or constructive input.  

        ---

        # Theme: Relational  

        ## Sub-theme: Relational  

        ### Code: Personal  
        **Definition:** Emphasises the value students place on human interaction and relational qualities in feedback. They view feedback as more meaningful when it comes from people who understand them personally, can express emotion, and provide embodied, human connection.  

        ### Code: Expert  
        **Definition:** Reflects the importance of feedback coming from recognised experts, such as tutors or lecturers. Students perceive human expertise as superior to AI, due to its grounding in disciplinary knowledge and professional experience.  

        ---

        # Theme: Value  

        ## Sub-theme: Value  

        ### Code: Preference  
        **Definition:** Expresses a general preference for not using AI for feedback, often without detailed justification. Students report personal dislike or lack of interest in AI as a feedback source.  

        ### Code: Need  
        **Definition:** Indicates that students feel no necessity for AI feedback, either because their existing human feedback sources are sufficient or because they are confident in their own skills.  

        ### Code: Unsustainable  
        **Definition:** Reflects ethical or environmental concerns about the use of AI. Students reject AI feedback due to its perceived ecological cost or association with unsustainable and unethical practices.  

        ### Code: Privacy  
        **Definition:** Concerns about the security and confidentiality of student work when submitted to AI platforms. Students fear loss of control over their data, possible misuse, or breaches of academic confidentiality.  

        ### Code: Integrity  
        **Definition:** Highlights a strong commitment to academic honesty and originality. Students reject AI feedback to avoid compromising their learning, breaking institutional rules, or engaging in behaviour perceived as unethical or academically dishonest.  


"""
                
OUTPUT_FORMAT_43 =  """

    {
        "reasons": [
            {   
                "theme": "...",
                "sub_theme": "...",
                "code": "...",
                "reasoning": "..."
            }
        ]
    }                        
                    
                    
                    """
                    
SHOTS_43 = """

Examples of good output:

Example 1:

Question: "Why didn’t you use GenAI for feedback on your work?"

Student response: "I didn’t even know that this could be done. I also prefer to use my brain more to have creativity and originality in my work, studies and personal life."

Good output:

{
    "reasons": [
        {   
            "theme": "Processes",
            "sub_theme": "Access",
            "code": "Unaware",
            "reasoning": "The clause 'I didn’t even know that this could be done' is a direct admission of not knowing GenAI can provide feedback. The Unaware code explicitly covers respondents who did not know GenAI could do this or how to use it, so the statement maps precisely to Processes → Access → Unaware."
        },
        {
            "theme": "Value",
            "sub_theme": "Value",
            "code": "Preference",
            "reasoning": "The student states a normative stance—'I prefer to use my brain… for creativity and originality'—which indicates an intentional choice to avoid GenAI irrespective of its capabilities. This aligns with the Preference code, which captures a stated preference not to use GenAI without invoking a technical limitation."
        }
    ]
}

Example 2:

Question: "Why didn’t you use GenAI for feedback on your work?"
Student response: "I don't believe GenAI can provide more valuable feedback than I could get from a person proofreading it. I would rather ask a friend or family member to read what I've written."
Good output:
{
    "reasons": [
        {   
            "theme": "Information",
            "sub_theme": "Quality",
            "code": "Trustworthy",
            "reasoning": "By asserting 'I don't believe GenAI can provide more valuable feedback' and preferring human proofreaders, the student signals skepticism about GenAI’s credibility and evaluative competence. The Trustworthy code captures doubts about accuracy/reliability of information or the source’s ability to provide sound feedback, which is exactly what this statement conveys."
        }
    ]
}
"""

CODING_STRATEGY_43 = """

                        1) Scope & Principle

                        Goal: Identify explicit reasons for not using GenAI.

                        Surface-level only: Code the direct, literal meaning; do not infer beyond what is written.

                        2) Chunking

                        Split the response into meaningful chunks (sentence or key clause).

                        Assign one code per chunk. If multiple codes could apply, choose the most specific.

                        3) Required Fields

                        For each usable chunk, produce one reason with:

                        theme: Processes | Information | Tone | Relational | Value

                        sub_theme: Use the correct sub-theme; if not stated, repeat the theme (i.e., sub_theme = theme).

                        code: The specific characteristic (e.g., Unaware, Effortful, Less effort, Contextualised, Trustworthy, Privacy, Integrity, Unsustainable, Personal, Expert, Preference, Need, Positivity).

                        reasoning: Brief justification that quotes or precisely references the triggering phrase(s) and ties them to the code definition.
                        
                        4) Usability Rules

                        Usable: The chunk clearly states a reason for non-use (e.g., didn’t know, too hard/time, don’t trust, lacks context, not helpful, prefer human, no need, privacy, integrity, environmental concerns, style dislike).

                        UNUSABLE: Unintelligible, off-topic, or no reason stated.

                    """

In [157]:
system_prompt = f"""

    You're a expert qualitative researcher analyzing a student's response to a survey. Your task is to code the student's response following the coding strategy provided.

    The aim of the research is {aim_research}

    The coding strategy is {CODING_STRATEGY_V2}

    The code for the actors is {actor_codes}

    The code for the comparators is {comparator_codes}

    The code for the characteristics is {characteristic_codes_v2}

    The output format is {OUTPUT_FORMAT}

    Some examples of good outputs are:

    {SHOTS}

"""

system_prompt_43 = f"""

    You're a expert qualitative researcher analyzing a student's response to a survey. Your task is to code the student's response following the coding strategy provided.

    The aim of the research is {aim_research}

    The coding strategy is {CODING_STRATEGY_43}

    The code for the themes is {reasons_codes_v2}

    The output format is {OUTPUT_FORMAT_43}

    Some examples of good outputs are:

    {SHOTS_43}
"""

initial_prompt = f"""

    Based on the coding framework, the coding strategy, the output format, and the shots, analyse the student's response.
    
    Code all the responses, and output the results in the format {OUTPUT_FORMAT}.
    
    Keep the order of the chunks as they are in the student's response. If you code several chunks, keep the order of the chunks as they are in the student's response.
    
    JUST RETURN THE DESIRED OUTPUT, DO NOT ADD ANYTHING ELSE.
    "Return a single valid JSON object ONLY. "
    "No markdown, no backticks, no prose. "

"""

initial_prompt_43 = f"""

    Based on the coding framework, the coding strategy, the output format, and the shots, analyse the student's response.
    
    Code all the responses, and output the results in the format {OUTPUT_FORMAT_43}.
    
    Keep the order of the chunks as they are in the student's response. If you code several chunks, keep the order of the chunks as they are in the student's response.
    
    JUST RETURN THE DESIRED OUTPUT, DO NOT ADD ANYTHING ELSE.
    "Return a single valid JSON object ONLY. "
    "No markdown, no backticks, no prose. "

"""

## 1. Packages

In [158]:
from dotenv import load_dotenv
from langchain_core.messages import HumanMessage
from langchain_openai import AzureChatOpenAI
from openai import AzureOpenAI
from pathlib import Path
from typing import Dict, List, Any
import json
import os
import pandas as pd

In [159]:
load_dotenv()  # take environment variables from .env.

True

## 2. API Call

In [160]:
AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
OPENAI_SECRET_ACCESS_KEY = os.getenv("OPENAI_SECRET_ACCESS_KEY")
OPENAI_DEFAULT_REGION = "australiaeast"
AZURE_OPENAI_ENDPOINT = "https://cic-topic-modeling.openai.azure.com/"
AZURE_OPENAI_CHAT_DEPLOYMENT_NAME = "chat"
SET_TEMP = 0.00000001


model_azure = AzureChatOpenAI(
    api_key=AZURE_OPENAI_API_KEY,
    api_version="2024-05-01-preview",
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    model = "gpt-4-32k", #gpt-4o model by default
    temperature = SET_TEMP
)

In [161]:
# Simple test to check if the Azure OpenAI API is working

def test_openai_api(model):
    try:
        response = model.invoke([HumanMessage(content="Say hello!")])
        print("API call successful. Response:")
        print(response.content)
    except Exception as e:
        print("API call failed.")
        print(e)

# Run the test
test_openai_api(model_azure)


API call successful. Response:
Hello there! 😊 How can I assist you today?


## 3. Workflow

In [None]:
from pathlib import Path
import json

import json, re
from typing import Tuple, Any

_CODEFENCE_RE = re.compile(r"^\s*```(?:json)?\s*(.*?)\s*```\s*$", re.IGNORECASE | re.DOTALL)

def _strip_codefences(s: str) -> str:
    m = _CODEFENCE_RE.match(s)
    return m.group(1).strip() if m else s

def _extract_json_segment(s: str) -> str:
    """Grab the innermost JSON-looking block if extra text surrounds it."""
    s = s.strip()
    # Prefer object
    lo, ro = s.find("{"), s.rfind("}")
    if 0 <= lo < ro:
        return s[lo:ro+1]
    # Fallback to array
    la, ra = s.find("["), s.rfind("]")
    if 0 <= la < ra:
        return s[la:ra+1]
    return s  # last resort

def _soft_fixes(s: str) -> str:
    # Normalize smart quotes
    s = s.replace("“", '"').replace("”", '"').replace("’", "'")
    # Remove trailing commas before } or ]
    s = re.sub(r",\s*([}\]])", r"\1", s)
    return s

def parse_iteration_json(raw: str) -> Tuple[Any, str]:
    """
    Returns (obj, error). If parsing fails, obj=None and error explains why.
    Accepts outputs with code fences, pre/post text, or minor JSON slop.
    """
    candidate = _strip_codefences(raw)
    candidate = _extract_json_segment(candidate)
    candidate = _soft_fixes(candidate)
    try:
        return json.loads(candidate), ""
    except json.JSONDecodeError as e:
        return None, f"JSON parse error: {e}"


class IterationCoder:
    def __init__(self, name: str, model):
        self.name = name
        self.model = model
    
    def code_response(self, question_key: str, question_data: Dict, is_question_43: bool = False):
        """Code a single question response"""
        if is_question_43:
            system_msg = system_prompt_43
            user_msg = f"{initial_prompt_43}\n\nQuestion: {question_data['Question']}\n\nStudent response: {question_data['Response']}"
        else:
            system_msg = system_prompt
            user_msg = f"{initial_prompt}\n\nQuestion: {question_data['Question']}\n\nStudent response: {question_data['Response']}"
        
        try:
            response = self.model.invoke([
                HumanMessage(content=f"System: {system_msg}\n\nUser: {user_msg}")
            ])
            return response.content
        except Exception as e:
            return f"Error: {str(e)}"

class Researcher:
    def __init__(self, model):
        self.model = model
    
    def validate_coding(self, question_key: str, question_data: Dict, iteration_codings: List[str], is_question_43: bool = False):
        """Validate the coding from all iterations"""
        validation_prompt = f"""As a senior researcher, you need to validate the coding of three iterations for the following question:

    Question: {question_data['Question']}
    Student Response: {question_data['Response']}

    Iteration 1 coding: {iteration_codings[0]}
    Iteration 2 coding: {iteration_codings[1]}
    Iteration 3 coding: {iteration_codings[2]}

    Please provide:
    1. A brief assessment of coding quality (1-5 scale)
    2. Any major discrepancies between iterations
    3. Recommendations for improvement

    Format your response as JSON:
    {{
        "quality_score": 1-5,
        "discrepancies": ["list of major differences"],
        "recommendations": ["list of suggestions"]
    }}"""
            
            try:
                response = self.model.invoke([HumanMessage(content=validation_prompt)])
                return response.content
            except Exception as e:
                return f"Error: {str(e)}"

class QuantitativeResearcher:
    def __init__(self):
        pass
    
    def calculate_metrics(self, all_results: List[Dict]):
        """Calculate various metrics from the coding results"""
        metrics = {
            "total_responses": len(all_results),
            "questions_coded": {},
            "iteration_agreement": {},
            "coding_quality": {},
            "common_themes": {},
            "actor_distribution": {"A": 0, "T": 0, "B": 0},
            "comparator_distribution": {"More": 0, "Less": 0, "Similar": 0}
        }
        
        # Process each response
        for result in all_results:
            response_id = result["ResponseId"]
            
            # Count questions coded
            for key, value in result.items():
                if key.startswith("Question ") and isinstance(value, dict) and "iteration_codings" in value:
                    question_num = key.split(" ")[1]
                    if question_num not in metrics["questions_coded"]:
                        metrics["questions_coded"][question_num] = 0
                    metrics["questions_coded"][question_num] += 1
            
                # Calculate iteration agreement for each question
            for key, value in result.items():
                if key.startswith("Question ") and isinstance(value, dict) and "iteration_codings" in value:
                    question_num = key.split(" ")[1]
                    if question_num not in metrics["iteration_agreement"]:
                        metrics["iteration_agreement"][question_num] = []
                    
                    # Simple agreement calculation (can be enhanced)
                    iteration_codes = value["iteration_codings"]
                    if len(iteration_codes) == 3:
                        # Count unique codes
                        unique_codes = set()
                        for iteration_code in iteration_codes:
                            if isinstance(iteration_code, str) and "codes" in iteration_code:
                                # Extract codes from JSON string (simplified)
                                unique_codes.add(iteration_code[:100])  # Simplified for demo
                        
                        agreement_score = len(unique_codes) / 3  # Higher = less agreement
                        metrics["iteration_agreement"][question_num].append(agreement_score)
        
        return metrics


def _ensure_writable_dir(p: Path) -> Path:
    try:
        p.mkdir(parents=True, exist_ok=True)
        return p
    except OSError as e:
        if e.errno in (EROFS, EACCES):
            # fallback to a guaranteed-writable temp/home location
            fallback = Path.cwd() / "workflow_fallback" / p.name
            fallback.mkdir(parents=True, exist_ok=True)
            print(f"[warn] '{p}' not writable ({e}). Using '{fallback}' instead.")
            return fallback
        raise


def process_survey_files_iterations_only(
    survey_dir: str = "temp_survey",
    output_dir: str = "first_coding",
    model_for_iterations=None  # if you want to inject/override
):
    """
    Iterations-only workflow:
    - Loads each survey JSON
    - For every 'Question N' with an Response, calls three IterationCoders
    - Saves per-response files and one combined file
    - Skips researcher validation and quantitative metrics
    """
    # Make sure dirs exist
    survey_dir = Path(survey_dir)
    output_dir = Path(output_dir)
    if output_dir.is_absolute():
        # prevent writing to /
        output_dir = Path.cwd() / output_dir.name
    else:
        output_dir = Path.cwd() / output_dir

    output_dir = _ensure_writable_dir(output_dir)

    # Init iterations
    iteration1 = IterationCoder("Iteration1", model_for_iterations or model_azure)
    iteration2 = IterationCoder("Iteration2", model_for_iterations or model_azure)
    iteration3 = IterationCoder("Iteration3", model_for_iterations or model_azure)

    # Collect survey files
    survey_files = [f for f in survey_dir.glob("*.json") if f.name != ".DS_Store"]
    all_results = []

    for survey_file in survey_files:
        print(f"Processing {survey_file.name}...")
        with open(survey_file, "r", encoding="utf-8") as f:
            survey_data = json.load(f)

        result = {"ResponseId": survey_data.get("ResponseId", "")}

        # Iterate questions
        for key, value in survey_data.items():
            if key.startswith("Question ") and isinstance(value, dict) and "Response" in value:
                question_num = key.split(" ")[1]
                is_question_43 = (question_num == "43")

                print(f"  Coding {key}...")
                e1_raw = iteration1.code_response(key, value, is_question_43)
                e2_raw = iteration2.code_response(key, value, is_question_43)
                e3_raw = iteration3.code_response(key, value, is_question_43)

                e1_obj, e1_err = parse_iteration_json(e1_raw)
                e2_obj, e2_err = parse_iteration_json(e2_raw)
                e3_obj, e3_err = parse_iteration_json(e3_raw)

                result[key] = {
                    "Question": value.get("Question", ""),
                    "Response": value.get("Response", ""),
                    "iteration_codings": {
                        "Iteration1": e1_obj if e1_obj is not None else {"_raw": e1_raw, "_error": e1_err},
                        "Iteration2": e2_obj if e2_obj is not None else {"_raw": e2_raw, "_error": e2_err},
                        "Iteration3": e3_obj if e3_obj is not None else {"_raw": e3_raw, "_error": e3_err},
                    }
                }

        # Save individual result
        out_file = output_dir / f"{result['ResponseId'] or survey_file.stem}_coded.json"
        with open(out_file, "w", encoding="utf-8") as f:
            json.dump(result, f, indent=2, ensure_ascii=False)
        all_results.append(result)
        print(f"  Saved to {out_file}")

    # Save combined results only (no metrics, no validation)
    combined_file = output_dir / "all_coded_responses.json"
    with open(combined_file, "w", encoding="utf-8") as f:
        json.dump(all_results, f, indent=2, ensure_ascii=False)

    print("\nWorkflow completed (iterations-only)!")
    print(f"Processed {len(survey_files)} survey files")
    print(f"Combined results saved to: {combined_file}")

    return all_results


In [163]:
process_survey_files_iterations_only()

Processing R_4C4j4KNUq9N3VGC.json...
  Coding Question 40...
  Coding Question 41...
  Coding Question 42...
  Saved to /Users/brayampineda/Library/CloudStorage/OneDrive-UTS/Research Project/workflow/first_coding/R_4C4j4KNUq9N3VGC_coded.json
Processing R_4CKOoHgU8hp01rg.json...
  Coding Question 40...
  Coding Question 41...
  Coding Question 42...
  Saved to /Users/brayampineda/Library/CloudStorage/OneDrive-UTS/Research Project/workflow/first_coding/R_4CKOoHgU8hp01rg_coded.json
Processing R_43e00PZDT3YYIZ2.json...
  Coding Question 43...
  Saved to /Users/brayampineda/Library/CloudStorage/OneDrive-UTS/Research Project/workflow/first_coding/R_43e00PZDT3YYIZ2_coded.json
Processing R_4P0PnRK3k4DjAxR.json...
  Coding Question 40...
  Coding Question 41...
  Coding Question 42...
  Saved to /Users/brayampineda/Library/CloudStorage/OneDrive-UTS/Research Project/workflow/first_coding/R_4P0PnRK3k4DjAxR_coded.json
Processing R_4Ka5vRl3NuBJ1Af.json...
  Coding Question 40...
  Coding Question 4

[{'ResponseId': 'R_4C4j4KNUq9N3VGC',
  'Question 40': {'Question': 'Please describe any differences in how it impacted your learning (comparing GenAI and your teacher)?',
   'Response': 'GenAI gives instant answers anytime. while a teacher offers personalized feedback and better interaction for deeper understanding.',
   'iteration_codings': {'Iteration1': {'codes': [{'actor': 'A',
       'comparator': 'More',
       'theme': 'Processes',
       'sub_theme': 'Access',
       'code': 'Speed',
       'reasoning': "The phrase 'gives instant answers anytime' directly maps to the Speed code, highlighting the immediacy and time-efficient nature of GenAI feedback compared to teachers."},
      {'actor': 'T',
       'comparator': 'More',
       'theme': 'Relational',
       'sub_theme': 'Relational',
       'code': 'Personal',
       'reasoning': "The phrase 'offers personalized feedback' explicitly indicates that teacher feedback is tailored to the individual, aligning with the Personal code 

## 4. Validation

In [85]:
# Quick access function
def quick_get_coding(response_id, question_key):
    """Quick access to coding data"""
    with open("first_coding/all_coded_responses.json", 'r') as f:
        data = json.load(f)
    
    for response in data:
        if response["ResponseId"] == response_id:
            if question_key in response:
                return response[question_key]
            else:
                print(f"Question {question_key} not found in {response_id}")
                return None
    print(f"Response ID {response_id} not found")
    return None

# Usage example:
coding = quick_get_coding("R_1VQ5hmwtdpRmnmu", "Question 40")

In [86]:
coding['iteration_codings']['Iteration3']['codes']

[{'actor': 'A',
  'comparator': 'More',
  'theme': 'Information',
  'sub_theme': 'Quality',
  'code': 'Specificity',
  'reasoning': "The phrase 'helpful for finding technical errors and finding understandable overviews of specific topics' indicates that GenAI provides detailed and specific information. This aligns with the Specificity code, which captures feedback that is precise and detailed."},
 {'actor': 'T',
  'comparator': 'More',
  'theme': 'Information',
  'sub_theme': 'Quality',
  'code': 'In-depth',
  'reasoning': "The statement 'search for more implicit reasonings when providing feedback' suggests that teachers focus on deeper, more nuanced aspects of reasoning. This aligns with the In-depth code, which captures feedback that is thorough and nuanced."}]

In [108]:
import json
import os
import pandas as pd
from typing import Dict, List, Tuple, Optional

def compare_human_model_codings(all_coded_responses_path: str, temp_survey_folder: str) -> pd.DataFrame:
    """
    Compare human-coded responses with model-coded responses.
    
    Args:
        all_coded_responses_path: Path to the all_coded_responses.json file
        temp_survey_folder: Path to the temp_survey folder containing human-coded files
    
    Returns:
        DataFrame with comparison of human vs model codings
    """
    
    # Load the model responses
    with open(all_coded_responses_path, 'r', encoding='utf-8') as f:
        model_responses = json.load(f)
    
    # Create a dictionary for quick lookup
    model_lookup = {resp['ResponseId']: resp for resp in model_responses}
    
    # Get list of human-coded files
    human_files = [f for f in os.listdir(temp_survey_folder) if f.endswith('.json')]
    
    comparison_data = []
    
    for human_file in human_files:
        response_id = human_file.replace('.json', '')
        
        # Skip if this response ID is not in the model responses
        if response_id not in model_lookup:
            continue
            
        # Load human-coded response
        human_file_path = os.path.join(temp_survey_folder, human_file)
        with open(human_file_path, 'r', encoding='utf-8') as f:
            human_response = json.load(f)
        
        model_response = model_lookup[response_id]
        
        # Process each question
        for question_key in human_response:
            if not question_key.startswith('Question'):
                continue
                
            # Skip if question doesn't exist in model response
            if question_key not in model_response:
                continue
            
            human_codes = human_response[question_key].get('Codes', [])
            model_iteration_codings = model_response[question_key].get('iteration_codings', {})
            
            # Format human codes
            human_actor_comparator = []
            human_characteristics = []
            for code in human_codes:
                if code.get('Actor', '') not in ['nil', 'Nil', '']:
                    human_actor_comparator.append(f"{code.get('Actor', '')}-{code.get('Comparator', '').lower()}")
                    human_characteristics.append(code.get('Characteristic', ''))
            
            # Format model codes for each iteration
            iteration_data = {}
            for iteration_name in ['Iteration1', 'Iteration2', 'Iteration3']:
                iteration_codes = model_iteration_codings.get(iteration_name, {}).get('codes', [])
                
                actor_comparator = []
                characteristics = []
                themes = []
                sub_themes = []
                reasonings = []
                
                for code in iteration_codes:
                    actor_comparator.append(f"{code.get('actor', '')}-{code.get('comparator', '').lower()}")
                    characteristics.append(code.get('code', ''))
                    themes.append(code.get('theme', ''))
                    sub_themes.append(code.get('sub_theme', ''))
                    reasonings.append(code.get('reasoning', ''))
                
                iteration_data[iteration_name] = {
                    'actor_comparator': actor_comparator,
                    'characteristics': characteristics,
                    'themes': themes,
                    'sub_themes': sub_themes,
                    'reasonings': reasonings
                }
            
            # Create comparison row
            comparison_data.append({
                'ResponseId': response_id,
                'Question': question_key,
                'Question_Text': model_response[question_key].get('Question', ''),
                'Response_Text': model_response[question_key].get('Response', ''),
                'Human_Actor_Comparator': human_actor_comparator,
                'Human_Characteristics': human_characteristics,
                **{f"{iter_name}_Actor_Comparator": data['actor_comparator'] for iter_name, data in iteration_data.items()},
                **{f"{iter_name}_Characteristics": data['characteristics'] for iter_name, data in iteration_data.items()},
                **{f"{iter_name}_Themes": data['themes'] for iter_name, data in iteration_data.items()},
                **{f"{iter_name}_SubThemes": data['sub_themes'] for iter_name, data in iteration_data.items()},
                **{f"{iter_name}_Reasonings": data['reasonings'] for iter_name, data in iteration_data.items()}
            })
    
    # Create DataFrame
    df = pd.DataFrame(comparison_data)
    
    # Reorder columns for better readability
    if not df.empty:
        base_cols = ['ResponseId', 'Question', 'Question_Text', 'Response_Text', 
                    'Human_Actor_Comparator', 'Human_Characteristics']
        iteration_cols = []
        for iteration in ['Iteration1', 'Iteration2', 'Iteration3']:
            iteration_cols.extend([
                f'{iteration}_Actor_Comparator',
                f'{iteration}_Characteristics',
                f'{iteration}_Themes',
                f'{iteration}_SubThemes',
                f'{iteration}_Reasonings'
            ])
        
        df = df[base_cols + iteration_cols]
    
    return df

def get_question_summary(comparison_df: pd.DataFrame, question: str) -> pd.DataFrame:
    """
    Get summary for a specific question.
    
    Args:
        comparison_df: DataFrame from compare_human_model_codings
        question: Question key (e.g., 'Question 40')
    
    Returns:
        Filtered DataFrame for the specific question
    """
    return comparison_df[comparison_df['Question'] == question]

def get_actor_comparator_summary(comparison_df: pd.DataFrame) -> pd.DataFrame:
    """
    Get summary of Actor-Comparator patterns.
    
    Args:
        comparison_df: DataFrame from compare_human_model_codings
    
    Returns:
        Summary DataFrame with Actor-Comparator counts
    """
    summary_data = []
    
    # Human coding summary
    human_summary = comparison_df.groupby(['Human_Actor', 'Human_Comparator']).size().reset_index(name='Human_Count')
    summary_data.append(human_summary)
    
    # Model coding summary for each iteration
    for iteration in ['Iteration1', 'Iteration2', 'Iteration3']:
        actor_col = f'{iteration}_Actor'
        comparator_col = f'{iteration}_Comparator'
        
        if actor_col in comparison_df.columns:
            iteration_summary = comparison_df.groupby([actor_col, comparator_col]).size().reset_index(name=f'{iteration}_Count')
            summary_data.append(iteration_summary)
    
    # Merge all summaries
    if summary_data:
        final_summary = summary_data[0]
        for summary in summary_data[1:]:
            final_summary = final_summary.merge(summary, on=['Human_Actor', 'Human_Comparator'], how='outer')
        
        # Fill NaN values with 0
        final_summary = final_summary.fillna(0)
        
        return final_summary
    
    return pd.DataFrame()



In [109]:
# Example usage:
comparison_df = compare_human_model_codings(
    'first_coding/all_coded_responses.json',
    'temp_survey'
)

# # Get summary for specific question
# question_40_summary = get_question_summary(comparison_df, 'Question 40')

# # Get Actor-Comparator summary
# actor_comparator_summary = get_actor_comparator_summary(comparison_df)

In [110]:
comparison_df.sample(3)

Unnamed: 0,ResponseId,Question,Question_Text,Response_Text,Human_Actor_Comparator,Human_Characteristics,Iteration1_Actor_Comparator,Iteration1_Characteristics,Iteration1_Themes,Iteration1_SubThemes,...,Iteration2_Actor_Comparator,Iteration2_Characteristics,Iteration2_Themes,Iteration2_SubThemes,Iteration2_Reasonings,Iteration3_Actor_Comparator,Iteration3_Characteristics,Iteration3_Themes,Iteration3_SubThemes,Iteration3_Reasonings
16,R_1gTNWbmwxtRxrf8,Question 40,Please describe any differences in how it impa...,"between AI and a teacher, the teacher is alway...","[T-more, A-more]","[Reliable, Understandable]","[T-more, A-more, A-more]","[Reliable, Understandable, Utility]","[Information, Information, Information]","[Quality, Quality, Quality]",...,"[T-more, A-more, A-more]","[Reliable, Understandable, Utility]","[Information, Information, Information]","[Quality, Quality, Quality]",[The phrase 'the teacher is always going to be...,"[T-more, A-more, A-more]","[Reliable, Understandable, Volume]","[Information, Information, Processes]","[Quality, Quality, Access]",[The phrase 'the teacher is always going to be...
6,R_43e00PZDT3YYIZ2,Question 43,Why didn’t you use GenAI for feedback on your ...,I didn't know how / if gen AI would know about...,[],[],[],[],[],[],...,[],[],[],[],[],[],[],[],[],[]
18,R_1gTNWbmwxtRxrf8,Question 42,Describe any other differences between the fee...,nil,[],[],[],[],[],[],...,[],[],[],[],[],[],[],[],[],[]


In [111]:
comparison_df.shape

(24, 21)

In [112]:
# Export comparison results to CSV
# Convert list values to strings before exporting
comparison_df.map(lambda x: str(x) if isinstance(x, list) else x).to_csv('comparison_results.csv', index=False)

In [164]:
import json
import os
import pandas as pd
from typing import Dict, List, Tuple, Optional

def compare_human_model_codings(all_coded_responses_path: str, temp_survey_folder: str) -> pd.DataFrame:
    """
    Compare human-coded responses with model-coded responses.
    
    Args:
        all_coded_responses_path: Path to the all_coded_responses.json file
        temp_survey_folder: Path to the temp_survey folder containing human-coded files
    
    Returns:
        DataFrame with comparison of human vs model codings
    """
    
    # Load the model responses
    with open(all_coded_responses_path, 'r', encoding='utf-8') as f:
        model_responses = json.load(f)
    
    # Create a dictionary for quick lookup
    model_lookup = {resp['ResponseId']: resp for resp in model_responses}
    
    # Get list of human-coded files
    human_files = [f for f in os.listdir(temp_survey_folder) if f.endswith('.json')]
    
    comparison_data = []
    
    for human_file in human_files:
        response_id = human_file.replace('.json', '')
        
        # Skip if this response ID is not in the model responses
        if response_id not in model_lookup:
            continue
            
        # Load human-coded response
        human_file_path = os.path.join(temp_survey_folder, human_file)
        with open(human_file_path, 'r', encoding='utf-8') as f:
            human_response = json.load(f)
        
        model_response = model_lookup[response_id]
        
        # Process each question
        for question_key in human_response:
            if not question_key.startswith('Question'):
                continue
                
            # Skip if question doesn't exist in model response
            if question_key not in model_response:
                continue
            
            # Handle different structures for different questions
            if question_key == "Question 43":
                # For Question 43: Human has "Reasons" list, Model has "iteration_codings" with "reasons"
                human_codes = human_response[question_key].get('Reasons', [])
                model_iteration_codings = model_response[question_key].get('iteration_codings', {})
                
                # Extract human coding information as lists (for Question 43, these are just reasons)
                human_characteristics = human_codes if isinstance(human_codes, list) else []
                human_actor_comparators = []  # Not applicable for Question 43
                
                # Process model codes for each iteration
                iteration_data_dict = {}
                for iteration_name, iteration_data in model_iteration_codings.items():
                    iteration_reasons = iteration_data.get('reasons', [])
                    
                    iteration_themes = []
                    iteration_subthemes = []
                    iteration_codes_list = []
                    iteration_reasonings = []
                    
                    for model_reason in iteration_reasons:
                        theme = model_reason.get('theme', '')
                        sub_theme = model_reason.get('sub_theme', '')
                        code = model_reason.get('code', '')
                        reasoning = model_reason.get('reasoning', '')
                        
                        iteration_themes.append(theme)
                        iteration_subthemes.append(sub_theme)
                        iteration_codes_list.append(code)
                        iteration_reasonings.append(reasoning)
                    
                    iteration_data_dict[iteration_name] = {
                        'themes': iteration_themes,
                        'subthemes': iteration_subthemes,
                        'codes': iteration_codes_list,
                        'reasonings': iteration_reasonings
                    }
                
                # Create comparison row for Question 43
                comparison_data.append({
                    'ResponseId': response_id,
                    'Question': question_key,
                    'Human_Actor_Comparator': human_actor_comparators,
                    'Human_Characteristics': human_characteristics,
                    'Iteration1_Actor_Comparator': [],
                    'Iteration1_Characteristics': iteration_data_dict.get('Iteration1', {}).get('codes', []),
                    'Iteration1_Themes': iteration_data_dict.get('Iteration1', {}).get('themes', []),
                    'Iteration1_SubThemes': iteration_data_dict.get('Iteration1', {}).get('subthemes', []),
                    'Iteration1_Reasonings': iteration_data_dict.get('Iteration1', {}).get('reasonings', []),
                    'Iteration2_Actor_Comparator': [],
                    'Iteration2_Characteristics': iteration_data_dict.get('Iteration2', {}).get('codes', []),
                    'Iteration2_Themes': iteration_data_dict.get('Iteration2', {}).get('themes', []),
                    'Iteration2_SubThemes': iteration_data_dict.get('Iteration2', {}).get('subthemes', []),
                    'Iteration2_Reasonings': iteration_data_dict.get('Iteration2', {}).get('reasonings', []),
                    'Iteration3_Actor_Comparator': [],
                    'Iteration3_Characteristics': iteration_data_dict.get('Iteration3', {}).get('codes', []),
                    'Iteration3_Themes': iteration_data_dict.get('Iteration3', {}).get('themes', []),
                    'Iteration3_SubThemes': iteration_data_dict.get('Iteration3', {}).get('subthemes', []),
                    'Iteration3_Reasonings': iteration_data_dict.get('Iteration3', {}).get('reasonings', [])
                })
                
            else:
                # For Questions 40, 41, 42: Original structure with Actor-Comparator-Characteristic
                human_codes = human_response[question_key].get('Codes', [])
                model_iteration_codings = model_response[question_key].get('iteration_codings', {})
                
                # Extract human coding information as lists
                human_actors = []
                human_comparators = []
                human_characteristics = []
                human_actor_comparators = []
                
                for human_code in human_codes:
                    actor = human_code.get('Actor', '')
                    comparator = human_code.get('Comparator', '')
                    characteristic = human_code.get('Characteristic', '')
                    
                    # Skip if no meaningful coding
                    if actor in ['nil', 'Nil', '']:
                        continue
                    
                    human_actors.append(actor)
                    human_comparators.append(comparator)
                    human_characteristics.append(characteristic)
                    human_actor_comparators.append(f"{actor}-{comparator}")
                
                # Process model codes for each iteration
                iteration_data_dict = {}
                for iteration_name, iteration_data in model_iteration_codings.items():
                    iteration_codes = iteration_data.get('codes', [])
                    
                    iteration_actors = []
                    iteration_comparators = []
                    iteration_themes = []
                    iteration_subthemes = []
                    iteration_codes_list = []
                    iteration_reasonings = []
                    iteration_actor_comparators = []
                    
                    for model_code in iteration_codes:
                        actor = model_code.get('actor', '')
                        comparator = model_code.get('comparator', '')
                        theme = model_code.get('theme', '')
                        sub_theme = model_code.get('sub_theme', '')
                        code = model_code.get('code', '')
                        reasoning = model_code.get('reasoning', '')
                        
                        iteration_actors.append(actor)
                        iteration_comparators.append(comparator)
                        iteration_themes.append(theme)
                        iteration_subthemes.append(sub_theme)
                        iteration_codes_list.append(code)
                        iteration_reasonings.append(reasoning)
                        iteration_actor_comparators.append(f"{actor}-{comparator}")
                    
                    iteration_data_dict[iteration_name] = {
                        'actors': iteration_actors,
                        'comparators': iteration_comparators,
                        'themes': iteration_themes,
                        'subthemes': iteration_subthemes,
                        'codes': iteration_codes_list,
                        'reasonings': iteration_reasonings,
                        'actor_comparators': iteration_actor_comparators
                    }
                
                # Create comparison row for Questions 40-42
                comparison_data.append({
                    'ResponseId': response_id,
                    'Question': question_key,
                    'Human_Actor_Comparator': human_actor_comparators,
                    'Human_Characteristics': human_characteristics,
                    'Iteration1_Actor_Comparator': iteration_data_dict.get('Iteration1', {}).get('actor_comparators', []),
                    'Iteration1_Characteristics': iteration_data_dict.get('Iteration1', {}).get('codes', []),
                    'Iteration1_Themes': iteration_data_dict.get('Iteration1', {}).get('themes', []),
                    'Iteration1_SubThemes': iteration_data_dict.get('Iteration1', {}).get('subthemes', []),
                    'Iteration1_Reasonings': iteration_data_dict.get('Iteration1', {}).get('reasonings', []),
                    'Iteration2_Actor_Comparator': iteration_data_dict.get('Iteration2', {}).get('actor_comparators', []),
                    'Iteration2_Characteristics': iteration_data_dict.get('Iteration2', {}).get('codes', []),
                    'Iteration2_Themes': iteration_data_dict.get('Iteration2', {}).get('themes', []),
                    'Iteration2_SubThemes': iteration_data_dict.get('Iteration2', {}).get('subthemes', []),
                    'Iteration2_Reasonings': iteration_data_dict.get('Iteration2', {}).get('reasonings', []),
                    'Iteration3_Actor_Comparator': iteration_data_dict.get('Iteration3', {}).get('actor_comparators', []),
                    'Iteration3_Characteristics': iteration_data_dict.get('Iteration3', {}).get('codes', []),
                    'Iteration3_Themes': iteration_data_dict.get('Iteration3', {}).get('themes', []),
                    'Iteration3_SubThemes': iteration_data_dict.get('Iteration3', {}).get('subthemes', []),
                    'Iteration3_Reasonings': iteration_data_dict.get('Iteration3', {}).get('reasonings', [])
                })
    
    # Create DataFrame
    df = pd.DataFrame(comparison_data)
    
    return df

def get_question_summary(comparison_df: pd.DataFrame, question: str) -> pd.DataFrame:
    """
    Get summary for a specific question.
    
    Args:
        comparison_df: DataFrame from compare_human_model_codings
        question: Question key (e.g., 'Question 40')
    
    Returns:
        Filtered DataFrame for the specific question
    """
    return comparison_df[comparison_df['Question'] == question]

def get_actor_comparator_summary(comparison_df: pd.DataFrame) -> pd.DataFrame:
    """
    Get summary of Actor-Comparator patterns.
    
    Args:
        comparison_df: DataFrame from compare_human_model_codings
    
    Returns:
        Summary DataFrame with Actor-Comparator counts
    """
    summary_data = []
    
    # Human coding summary
    human_actor_comparators = []
    for actor_comparators in comparison_df['Human_Actor_Comparator']:
        human_actor_comparators.extend(actor_comparators)
    
    human_summary = pd.Series(human_actor_comparators).value_counts().reset_index()
    human_summary.columns = ['Actor_Comparator', 'Human_Count']
    summary_data.append(human_summary)
    
    # Model coding summary for each iteration
    for iteration in ['Iteration1', 'Iteration2', 'Iteration3']:
        actor_comparator_col = f'{iteration}_Actor_Comparator'
        
        if actor_comparator_col in comparison_df.columns:
            iteration_actor_comparators = []
            for actor_comparators in comparison_df[actor_comparator_col]:
                iteration_actor_comparators.extend(actor_comparators)
            
            iteration_summary = pd.Series(iteration_actor_comparators).value_counts().reset_index()
            iteration_summary.columns = ['Actor_Comparator', f'{iteration}_Count']
            summary_data.append(iteration_summary)
    
    # Merge all summaries
    if summary_data:
        final_summary = summary_data[0]
        for summary in summary_data[1:]:
            final_summary = final_summary.merge(summary, on='Actor_Comparator', how='outer')
        
        # Fill NaN values with 0
        final_summary = final_summary.fillna(0)
        
        return final_summary
    
    return pd.DataFrame()

def get_characteristics_summary(comparison_df: pd.DataFrame) -> pd.DataFrame:
    """
    Get summary of characteristics/codes used.
    
    Args:
        comparison_df: DataFrame from compare_human_model_codings
    
    Returns:
        Summary DataFrame with characteristics counts
    """
    summary_data = []
    
    # Human characteristics summary
    human_characteristics = []
    for characteristics in comparison_df['Human_Characteristics']:
        human_characteristics.extend(characteristics)
    
    human_summary = pd.Series(human_characteristics).value_counts().reset_index()
    human_summary.columns = ['Characteristic', 'Human_Count']
    summary_data.append(human_summary)
    
    # Model characteristics summary for each iteration
    for iteration in ['Iteration1', 'Iteration2', 'Iteration3']:
        characteristics_col = f'{iteration}_Characteristics'
        
        if characteristics_col in comparison_df.columns:
            iteration_characteristics = []
            for characteristics in comparison_df[characteristics_col]:
                iteration_characteristics.extend(characteristics)
            
            iteration_summary = pd.Series(iteration_characteristics).value_counts().reset_index()
            iteration_summary.columns = ['Characteristic', f'{iteration}_Count']
            summary_data.append(iteration_summary)
    
    # Merge all summaries
    if summary_data:
        final_summary = summary_data[0]
        for summary in summary_data[1:]:
            final_summary = final_summary.merge(summary, on='Characteristic', how='outer')
        
        # Fill NaN values with 0
        final_summary = final_summary.fillna(0)
        
        return final_summary
    
    return pd.DataFrame()



In [169]:
# Example usage:
comparison_df = compare_human_model_codings(
    'first_coding/all_coded_responses.json',
    'temp_survey'
)

# # Get summary for specific question
# question_40_summary = get_question_summary(comparison_df, 'Question 40')

# # Get Actor-Comparator summary
# actor_comparator_summary = get_actor_comparator_summary(comparison_df)

# # Get Characteristics summary
# characteristics_summary = get_characteristics_summary(comparison_df)

In [170]:
comparison_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24 entries, 0 to 23
Data columns (total 19 columns):
 #   Column                       Non-Null Count  Dtype 
---  ------                       --------------  ----- 
 0   ResponseId                   24 non-null     object
 1   Question                     24 non-null     object
 2   Human_Actor_Comparator       24 non-null     object
 3   Human_Characteristics        24 non-null     object
 4   Iteration1_Actor_Comparator  24 non-null     object
 5   Iteration1_Characteristics   24 non-null     object
 6   Iteration1_Themes            24 non-null     object
 7   Iteration1_SubThemes         24 non-null     object
 8   Iteration1_Reasonings        24 non-null     object
 9   Iteration2_Actor_Comparator  24 non-null     object
 10  Iteration2_Characteristics   24 non-null     object
 11  Iteration2_Themes            24 non-null     object
 12  Iteration2_SubThemes         24 non-null     object
 13  Iteration2_Reasonings        24 non-n

In [171]:
comparison_df['Question'].unique()

array(['Question 40', 'Question 41', 'Question 42', 'Question 43'],
      dtype=object)

In [172]:
import pandas as pd
import ast
from itertools import combinations

def safe_eval_list(x):
    """
    Ensures that string values like "['a','b']" 
    are converted into Python lists.
    """
    if isinstance(x, list):
        return x
    if isinstance(x, str):
        try:
            return ast.literal_eval(x)
        except:
            return [x]
    return []

def jaccard_similarity(list1, list2):
    set1, set2 = set(list1), set(list2)
    if not set1 and not set2:
        return 1.0  # both empty → perfect agreement
    return len(set1 & set2) / len(set1 | set2)

def compute_agreement(df, question_col="Question"):
    iteration_cols = [
        "Iteration1_Characteristics",
        "Iteration2_Characteristics",
        "Iteration3_Characteristics"
    ]
    
    # Ensure lists
    for col in iteration_cols:
        df[col] = df[col].apply(safe_eval_list)
    
    results = []

    for q, group in df.groupby(question_col):
        pair_scores = []
        for _, row in group.iterrows():
            for col1, col2 in combinations(iteration_cols, 2):
                score = jaccard_similarity(row[col1], row[col2])
                pair_scores.append(score)
        avg_score = sum(pair_scores) / len(pair_scores) if pair_scores else 0
        results.append({"Question": q, "Agreement": avg_score})
    
    return pd.DataFrame(results)

# Example usage
agreement_df = compute_agreement(comparison_df)
print(agreement_df)


      Question  Agreement
0  Question 40   0.827249
1  Question 41   0.961905
2  Question 42   0.892857
3  Question 43   0.888889


In [None]:
import pandas as pd
import ast

def safe_eval_list(x):
    """Converts stringified lists into Python lists safely."""
    if isinstance(x, list):
        return x
    if isinstance(x, str):
        try:
            return ast.literal_eval(x)
        except:
            return [x]
    return []

def jaccard_similarity(list1, list2):
    set1, set2 = set(list1), set(list2)
    if not set1 and not set2:
        return 1.0  # both empty = perfect agreement
    return len(set1 & set2) / len(set1 | set2)

def compute_human_iteration_agreement(df, question_col="Question"):
    human_col = "Human_Characteristics"
    iteration_cols = [
        "Iteration1_Characteristics",
        "Iteration2_Characteristics",
        "Iteration3_Characteristics"
    ]
    
    # Ensure all are lists
    df[human_col] = df[human_col].apply(safe_eval_list)
    for col in iteration_cols:
        df[col] = df[col].apply(safe_eval_list)
    
    results = []
    
    for q, group in df.groupby(question_col):
        for iteration_col in iteration_cols:
            scores = []
            for _, row in group.iterrows():
                score = jaccard_similarity(row[human_col], row[iteration_col])
                scores.append(score)
            avg_score = sum(scores) / len(scores) if scores else 0
            results.append({
                "Question": q,
                "Iteration": iteration_col.replace("_Characteristics", ""),
                "Agreement_with_Human": avg_score
            })
    
    return pd.DataFrame(results)

# Example usage
agreement_human_df = compute_human_iteration_agreement(comparison_df)
print(agreement_human_df)


       Question   Iteration  Agreement_with_Human
0   Question 40  Iteration1              0.505102
1   Question 40  Iteration2              0.433673
2   Question 40  Iteration3              0.433673
3   Question 41  Iteration1              0.535714
4   Question 41  Iteration2              0.492857
5   Question 41  Iteration3              0.492857
6   Question 42  Iteration1              0.586905
7   Question 42  Iteration2              0.586905
8   Question 42  Iteration3              0.604762
9   Question 43  Iteration1              1.000000
10  Question 43  Iteration2              1.000000
11  Question 43  Iteration3              0.833333


In [154]:
comparison_df.sample(1).values

array([['R_4C4j4KNUq9N3VGC', 'Question 41', list(['A-more', 'T-more']),
        list(['No impact', 'Positive']),
        list(['A-More', 'A-More', 'T-More', 'T-More']),
        list(['No impact', 'Less effort', 'Personal', 'Positive']),
        list(['Feeling', 'Processes', 'Relational', 'Feeling']),
        list(['Feeling', 'Effort', 'Relational', 'Feeling']),
        list(["The phrase 'felt more neutral' indicates an emotionally indifferent response to GenAI feedback. The No impact code captures feedback that does not evoke significant positive or negative emotions, which aligns with the student's description of GenAI feedback.", "The term 'efficient' suggests that GenAI feedback required less effort or cognitive load to process. The Less effort code captures reduced work or energy demands, which matches the student's description of GenAI feedback.", "The phrase 'more personal' directly indicates a relational quality of teacher feedback. The Personal code captures feedback that demon