In [2]:
import os
import yaml
import json
import warnings
import asyncio
from datetime import datetime
from typing import Literal, Dict, List, Any, Union, Optional
from pydantic import BaseModel, ValidationError
from crewai import Agent, Task, Crew, Process
from crewai.tasks.task_output import OutputFormat
from crewai.knowledge.source.crew_docling_source import CrewDoclingSource

import requests
import zipfile
import io
import time
import pandas as pd
import boto3
from dotenv import load_dotenv
import logging
import re

# Configure logging
logging.getLogger("opentelemetry").setLevel(logging.ERROR)
logging.getLogger("urllib3").setLevel(logging.ERROR)
os.environ["OTEL_TRACES_EXPORTER"] = "none"

# ========== Pydantic Models ==========
class ChoiceOption(BaseModel):
    text: str
    value: str

class ChoiceConfig(BaseModel):
    options: List[ChoiceOption]

class SliderConfig(BaseModel):
    min: float
    max: float
    step: float

class TextInputConfig(BaseModel):
    placeholder: Optional[str] = None
    multiline: bool = False

class Question(BaseModel):
    question_id: str
    question_text: str
    input_type: Literal["multiple_choice", "single_choice", "slider", "text_input"]
    input_config: Union[ChoiceConfig, SliderConfig, TextInputConfig]

class Survey(BaseModel):
    theme: str
    purpose: str
    questions: List[Question]

class QuestionComment(BaseModel):
    question_id: str
    comment: str

class AnnotatedSurvey(BaseModel):
    survey: Survey
    question_comments: List[QuestionComment]
    overall_comment: Optional[str]

class SurveyImprovementResult(BaseModel):
    original_with_comments: AnnotatedSurvey
    revised_survey: Survey

# ========== Utility to load YAML ==========
def load_yaml(path: str) -> dict:
    with open(path, 'r', encoding='utf-8') as f:
        return yaml.safe_load(f)

# ========== Agent Definitions ==========
def load_agents():
    conv_cfg = load_yaml("config/agents/survey_convert_agent.yaml")["survey_convert_agent"]
    convert_agent = Agent(
        name="survey_convert_agent",
        role=conv_cfg["role"],
        goal=conv_cfg["goal"],
        backstory=conv_cfg["backstory"],
        verbose=conv_cfg["verbose"],
        allow_delegation=conv_cfg["allow_delegation"]
    )

    edit_cfg = load_yaml("config/agents/survey_editor.yaml")["survey_editor"]
    editor_agent = Agent(
        name="survey_editor_agent",
        role=edit_cfg["role"],
        goal=edit_cfg["goal"],
        backstory=edit_cfg["backstory"],
        verbose=edit_cfg["verbose"],
        allow_delegation=edit_cfg["allow_delegation"]
    )
    
    return convert_agent, editor_agent

# ========== Task Definitions ==========
def load_tasks(convert_agent, editor_agent):
    # Load convert task
    conv_t = load_yaml("config/tasks/convert_survey_to_json.yaml")["convert_survey_to_json"]
    convert_task = Task(
        name="convert_survey_to_json",
        description=conv_t["description"],
        agent=convert_agent,
        tool=conv_t.get("tool"),
        expected_output=conv_t["expected_output"],
        output_format=OutputFormat.JSON
    )

    # Load research task and manually replace placeholders
    res_t = load_yaml("config/tasks/apply_survey_enhancements.yaml")["research_task"]
    # Remove placeholders by replacing them with actual values or generic text
    description = res_t["description"].replace("{topic}", "the survey topic").replace("{current_year}", str(datetime.now().year))
    expected_output = res_t["expected_output"].replace("{topic}", "the survey topic")
    
    research_task = Task(
        name="research_task",
        description=description,
        agent=convert_agent,
        expected_output=expected_output,
        output_format=OutputFormat.JSON
    )

    # Load comment task
    com_t = load_yaml("config/tasks/comment_survey_task.yaml")["comment_survey"]
    
    comment_task = Task(
        name="comment_survey",
        description=com_t["description"],
        agent=editor_agent,
        expected_output=com_t["expected_output"],
        output_format=OutputFormat.JSON
    )

    # Load improve task
    imp_t = load_yaml("config/tasks/apply_survey_enhancements.yaml")["improve_survey"]
    # Handle the JSON schema example carefully
    description = imp_t["description"].replace("{", "{{").replace("}", "}}")
    expected_output = imp_t["expected_output"].replace("{", "{{").replace("}", "}}")
    
    improve_task = Task(
        name="improve_survey",
        description=description,
        agent=editor_agent,
        expected_output=expected_output,
        output_format=OutputFormat.JSON
    )
    
    return convert_task, research_task, comment_task, improve_task

def survey_dict_to_qualtrics_payload(survey_dict: dict) -> dict:
    """
    Convert a custom survey dict to a Qualtrics v3 API survey-definitions payload
    Supports question types: multiple_choice, single_choice, slider, text_input
    """
    survey_meta = survey_dict["revised_survey"]
    payload = {
        "SurveyName":      survey_meta.get("theme", "New Survey"),
        "Language":        "EN",
        "ProjectCategory": "CORE",
        "Questions":       {}
    }

    for q in survey_meta["questions"]:
        raw_id = q["question_id"]                  # e.g. "q4"
        num    = re.sub(r'\D+', '', raw_id)        # extract number "4"
        qid    = f"QID{num}"                       # assemble "QID4"

        qt  = q["question_text"]
        it  = q["input_type"]
        cfg = q["input_config"]

        # ---- Common fields ----
        qobj = {
            "QuestionText":      qt,
            "DataExportTag":     qid,
            "Configuration":     {"QuestionDescriptionOption": "UseText"},
            "Validation":        {"Settings": {"ForceResponse": "OFF", "Type": "None"}}
        }

        # ---- Multiple/Single choice questions ----
        if it in ("multiple_choice", "single_choice"):
            choices = {}
            for opt in cfg.get("options", []):
                if "=" in opt:
                    idx, txt = opt.split("=", 1)
                    idx, txt = idx.strip(), txt.strip()
                else:
                    idx = str(len(choices) + 1)
                    txt = opt.strip()
                choices[idx] = {"Display": txt}

            qobj.update({
                "QuestionType": "MC",
                "Selector":     "SAVR" if it == "multiple_choice" else "SINGLE",
                "SubSelector":  "TX",
                "Choices":      choices
            })

        # ---- Slider questions ----
        elif it == "slider":
            # Safely read slider parameters (default 0-100, step 1)
            start = cfg.get("min", cfg.get("start", 0))
            end   = cfg.get("max", cfg.get("end", 100))
            step  = cfg.get("step", cfg.get("stepSize", 1))
        
            qobj.update({
                "QuestionType": "SL",
                "Selector":     "Slider",
                "SubSelector":  "SL",
                "SliderStart":  start,
                "SliderEnd":    end,
                "SliderStep":   step
            })
        # ---- Text input questions ----
        elif it == "text_input":   
            qobj.update({
                "QuestionType": "TE",
                "Selector":     "ML"   # Text Entry must use ML
            })
        else:
            raise ValueError(f"Unsupported input_type: {it!r}")

        # Insert into final payload
        payload["Questions"][qid] = qobj

    return payload

class SurveyConversionOutput(BaseModel):
    """Model to validate the output of the survey conversion agent"""
    title: str
    fields: List[Dict[str, Any]]

def validate_conversion_output(raw_output: str) -> Dict:
    """
    Validates the output of the survey conversion agent using Pydantic.
    
    Args:
        raw_output: The raw JSON string output from the agent
        
    Returns:
        The validated dictionary if successful
        
    Raises:
        ValidationError: If the output doesn't match the expected schema
        ValueError: If the output cannot be parsed as JSON
    """
    # Clean up the raw output - strip markdown code blocks if present
    cleaned_output = raw_output.strip()
    if cleaned_output.startswith("```json"):
        cleaned_output = cleaned_output.split("```json", 1)[1]
    if cleaned_output.startswith("```"):
        cleaned_output = cleaned_output.split("```", 1)[1]
    if "```" in cleaned_output:
        cleaned_output = cleaned_output.rsplit("```", 1)[0]
    
    try:
        # Parse the JSON
        parsed_dict = json.loads(cleaned_output)
        
        # Validate using Pydantic model
        validated = SurveyConversionOutput(**parsed_dict)
        
        # Return the validated dict
        return validated.dict()
    except json.JSONDecodeError as e:
        raise ValueError(f"Failed to parse JSON output: {e}\nRaw output:\n{raw_output}")
    except ValidationError as e:
        raise ValidationError(f"Output validation failed: {e}\nRaw output:\n{raw_output}", SurveyConversionOutput)

def convert_to_question_format(conversion_output: Dict) -> List[Question]:
    """
    Converts the validated conversion output to a list of Question objects.
    
    Args:
        conversion_output: The validated conversion output
        
    Returns:
        List of Question objects
    """
    questions = []
    for i, field in enumerate(conversion_output["fields"]):
        question_id = f"q{i+1}"
        question_text = field["title"]
        
        # Determine input type and config
        if field["type"] == "multiple_choice":
            input_type = "multiple_choice"
            options = [{"text": opt, "value": str(i)} for i, opt in enumerate(field.get("options", []))]
            input_config = ChoiceConfig(options=options)
        elif field["type"] == "text_input":
            input_type = "text_input"
            input_config = TextInputConfig(multiline=False)
        elif field["type"] == "slider":
            input_type = "slider"
            input_config = SliderConfig(min=0, max=100, step=1)  # Default values
        else:
            # Default to single_choice for most survey questions with scales
            input_type = "single_choice"
            options = [{"text": opt, "value": str(i)} for i, opt in enumerate(field.get("options", []))]
            input_config = ChoiceConfig(options=options)
        
        # Create Question object
        question = Question(
            question_id=question_id,
            question_text=question_text,
            input_type=input_type,
            input_config=input_config
        )
        
        questions.append(question)
    
    return questions

# Update the convert_task to include validation
def modified_convert_task(conv_t, convert_agent):
    """Creates a modified convert task with validation"""
    convert_task = Task(
        name="convert_survey_to_json",
        description=conv_t["description"],
        agent=convert_agent,
        tool=conv_t.get("tool"),
        expected_output=conv_t["expected_output"],
        output_format=OutputFormat.JSON,
        async_execution=True,  # Enable async for better performance
        validation_function=validate_conversion_output  # Add validation function
    )
    return convert_task

class QualtricsClient:
    """Handles all Qualtrics API interactions"""
    
    def __init__(self):
        """Initialize Qualtrics API client with credentials from .env file"""
        # Print current working directory to help debug file path issues
        print(f"Current working directory: {os.getcwd()}")
        
        # Check if .env file exists
        if os.path.exists('.env'):
            print("Found .env file in current directory")
        else:
            print("WARNING: No .env file found in current directory!")
            
        # Load environment variables
        load_dotenv(verbose=True)
        
        self.api_token = os.getenv('QUALTRICS_API_TOKEN')
        self.data_center = os.getenv('QUALTRICS_DATA_CENTER')
        self.directory_id = os.getenv('QUALTRICS_DIRECTORY_ID')
        
        # Print obfuscated token for debugging (only first/last 4 chars)
        if self.api_token:
            token_length = len(self.api_token)
            masked_token = self.api_token[:4] + '*' * (token_length - 8) + self.api_token[-4:] if token_length > 8 else "****"
            print(f"API Token loaded (masked): {masked_token}")
        else:
            print("WARNING: No API token found in environment variables!")
            
        if self.data_center:
            print(f"Data center: {self.data_center}")
        else:
            print("WARNING: No data center found in environment variables!")
        
        if not self.api_token or not self.data_center:
            raise ValueError("Missing Qualtrics API credentials in .env file")
            
        # Set up base URL for API requests
        self.base_url = f"https://{self.data_center}.qualtrics.com/API/v3/"
        self.headers = {
            "X-API-Token": self.api_token,
            "Content-Type": "application/json"
        }
        
        # Test connection
        print("Testing Qualtrics API connection...")
        try:
            test_url = f"{self.base_url}whoami"
            response = requests.get(test_url, headers=self.headers)
            if response.status_code == 200:
                user_info = response.json()["result"]
                print(f"Connection successful! Authenticated as: {user_info.get('firstName', '')} {user_info.get('lastName', '')}")
            else:
                print(f"Connection test failed with status code: {response.status_code}")
                print(f"Response: {response.text}")
        except Exception as e:
            print(f"Error testing connection: {str(e)}")
        
    def create_survey(self, survey_name, survey_template=None):
        """
        Create a new survey in Qualtrics
        
        Args:
            survey_name (str): Name of the survey
            survey_template (dict, optional): Survey template JSON
            
        Returns:
            str: Survey ID of the created survey
        """
        print(f"Creating survey: {survey_name}")
        
        # If no template is provided, use a basic template
        if not survey_template:
            # Define the survey payload with required fields including ProjectCategory
            survey_payload = {
                "SurveyName": survey_name,
                "Language": "EN",
                "ProjectCategory": "CORE", # Required field
                "Questions": {
                    "QID1": {
                        "QuestionText": "What is your age?",
                        "QuestionType": "MC",
                        "Selector": "SAVR", # Required selector for multiple choice
                        "SubSelector": "TX", # Text selector
                        "Configuration": {
                            "QuestionDescriptionOption": "UseText"
                        },
                        "Validation": {
                            "Settings": {
                                "ForceResponse": "OFF",
                                "Type": "None"
                            }
                        },
                        "Choices": {
                            "1": {"Display": "18-24"},
                            "2": {"Display": "25-34"},
                            "3": {"Display": "35-44"},
                            "4": {"Display": "45-54"},
                            "5": {"Display": "55-64"},
                            "6": {"Display": "65+"}
                        }
                    },
                    "QID2": {
                        "QuestionText": "How satisfied are you with our product?",
                        "QuestionType": "Likert",
                        "Selector": "LSL", # Likert scale
                        "SubSelector": "TX", # Text selector
                        "Configuration": {
                            "QuestionDescriptionOption": "UseText"
                        },
                        "Validation": {
                            "Settings": {
                                "ForceResponse": "OFF",
                                "Type": "None"
                            }
                        },
                        "Choices": {
                            "1": {"Display": "Very dissatisfied"},
                            "2": {"Display": "Dissatisfied"},
                            "3": {"Display": "Neutral"},
                            "4": {"Display": "Satisfied"},
                            "5": {"Display": "Very satisfied"}
                        }
                    },
                    "QID3": {
                        "QuestionText": "Any additional comments?",
                        "QuestionType": "TE", # Text entry
                        "Selector": "ML", # Multi-line
                        "Configuration": {
                            "QuestionDescriptionOption": "UseText"
                        },
                        "Validation": {
                            "Settings": {
                                "ForceResponse": "OFF",
                                "Type": "None"
                            }
                        }
                    }
                }
            }
        else:
            # If a template is provided, make sure it includes ProjectCategory
            survey_payload = survey_template
            if "ProjectCategory" not in survey_payload:
                survey_payload["ProjectCategory"] = "CORE"
        
        # Create survey
        url = f"{self.base_url}survey-definitions"
        payload = json.dumps(survey_payload)
        
        print(f"Sending payload to Qualtrics: {payload[:200]}...")
        
        response = requests.post(url, headers=self.headers, data=payload)
        
        if response.status_code != 200:
            print(f"Error response: {response.text}")
            raise Exception(f"Failed to create survey: {response.text}")
        
        result = response.json()
        survey_id = result["result"]["SurveyID"]
        print(f"Survey created successfully with ID: {survey_id}")
        
        return survey_id

    def add_questions(self, survey_id: str, questions: List[dict]):
        """Add questions to a survey"""
        for q in questions:
            # Start with the fields every question needs
            q_payload = {
                 "QuestionID":   q["question_id"],
                 "QuestionText": q["question_text"],
                 "QuestionType": q["QuestionType"],
                 "DataExportTag": q["question_id"],
                 "Configuration": {"QuestionDescriptionOption": "UseText"},
                 "Validation":    {"Settings": {"ForceResponse": "OFF", "Type": "None"}},
             }

            # Only add Selector/SubSelector if given
            if "Selector" in q:
                q_payload["Selector"] = q["Selector"]
            if "SubSelector" in q:
                q_payload["SubSelector"] = q["SubSelector"]
            # Only add Choices if given
            if "Choices" in q:
                q_payload["Choices"] = q["Choices"]
    
            url = f"{self.base_url}survey-definitions/{survey_id}/questions"
            resp = requests.post(url, headers=self.headers, json=q_payload)
            print(f"POST questions → {resp.status_code}", resp.json())

    def add_block(self, survey_id: str, block_payload: dict):
        """Add a block to a survey"""
        url = f"{self.base_url}survey-definitions/{survey_id}/blocks"
        resp = requests.post(url, headers=self.headers, json=block_payload)
        print(f"POST blocks → {resp.status_code}", resp.json())

    def update_flow(self, survey_id: str, flow_payload: dict):
        """Update the flow of a survey"""
        url = f"{self.base_url}survey-definitions/{survey_id}/flow"
        resp = requests.put(url, headers=self.headers, json=flow_payload)
        print("PUT flow →", resp.status_code, resp.json())
    
    def activate_survey(self, survey_id):
        """
        Activate a survey to make it available for distribution
        
        Args:
            survey_id (str): ID of the survey to activate
            
        Returns:
            bool: True if successful
        """
        print(f"Activating survey: {survey_id}")
        
        url = f"{self.base_url}surveys/{survey_id}"
        payload = json.dumps({"isActive": True})
        
        response = requests.put(url, headers=self.headers, data=payload)
        
        if response.status_code != 200:
            raise Exception(f"Failed to activate survey: {response.text}")
        
        print(f"Survey activated successfully")
        return True
    
    def create_distribution_link(self, survey_id, link_type="Anonymous"):
        """
        Create a distribution link for a survey
        
        Args:
            survey_id (str): ID of the survey to distribute
            link_type (str): Type of link (Anonymous or Individual)
            
        Returns:
            str: Distribution link URL
        """
        print(f"Creating distribution link for survey: {survey_id}")
        
        # For anonymous links, we can construct the URL directly based on the standard pattern
        # https://DATACENTERID.qualtrics.com/jfe/form/SURVEYID
        if link_type == "Anonymous":
            survey_link = f"https://{self.data_center}.qualtrics.com/jfe/form/{survey_id}"
            print(f"Anonymous survey link created: {survey_link}")
            return survey_link
        
        # For other distribution types, we would use the API, but that's not implemented yet
        else:
            raise NotImplementedError(f"Distribution type '{link_type}' is not yet supported")
    
    def get_survey_responses(self, survey_id, file_format="csv"):
        """
        Download survey responses
        
        Args:
            survey_id (str): ID of the survey
            file_format (str): Format of the response file (csv, json, spss, etc.)
            
        Returns:
            pandas.DataFrame: Survey responses as a DataFrame
        """
        print(f"Downloading responses for survey: {survey_id}")
        
        # Step 1: Create the export
        export_url = f"{self.base_url}surveys/{survey_id}/export-responses"
        export_payload = json.dumps({
            "format": file_format,
            "useLabels": True
        })
        
        export_response = requests.post(export_url, headers=self.headers, data=export_payload)
        
        if export_response.status_code != 200:
            raise Exception(f"Failed to initiate export: {export_response.text}")
        
        progress_id = export_response.json()["result"]["progressId"]
        
        # Step 2: Check export progress
        progress_status = "inProgress"
        progress = 0
        
        while progress_status != "complete" and progress < 100:
            progress_url = f"{self.base_url}surveys/{survey_id}/export-responses/{progress_id}"
            progress_response = requests.get(progress_url, headers=self.headers)
            
            if progress_response.status_code != 200:
                raise Exception(f"Failed to check export progress: {progress_response.text}")
            
            progress_result = progress_response.json()["result"]
            progress_status = progress_result["status"]
            progress = progress_result.get("percentComplete", 0)
            
            print(f"Export progress: {progress}%")
            
            if progress_status != "complete" and progress < 100:
                time.sleep(2)
        
        # Step 3: Download the file
        file_id = progress_result["fileId"]
        download_url = f"{self.base_url}surveys/{survey_id}/export-responses/{file_id}/file"
        download_response = requests.get(download_url, headers=self.headers)
        
        if download_response.status_code != 200:
            raise Exception(f"Failed to download responses: {download_response.text}")
        
        # Step 4: Extract and parse the zip file
        with zipfile.ZipFile(io.BytesIO(download_response.content)) as zip_file:
            data_file = [f for f in zip_file.namelist() if f.endswith(f".{file_format}")][0]
            with zip_file.open(data_file) as file:
                if file_format == "csv":
                    df = pd.read_csv(file)
                elif file_format == "json":
                    df = pd.read_json(file)
                else:
                    raise ValueError(f"Unsupported file format: {file_format}")
        
        print(f"Successfully downloaded {len(df)} responses")
        return df

class MTurkClient:
    """Handles all MTurk API interactions"""
    def __init__(self, 
                aws_access_key_id: str = None, 
                aws_secret_access_key: str = None, 
                use_sandbox: bool = True):  # Default to sandbox mode for safety
        """
        Initialize MTurk client
        
        Args:
            aws_access_key_id: Optional override for AWS access key
            aws_secret_access_key: Optional override for AWS secret key
            use_sandbox: Boolean for using sandbox (defaults to True for safety)
        """
        # Load from .env file
        load_dotenv()
        
        # Set AWS credentials (with optional overrides)
        self.aws_access_key_id = aws_access_key_id or os.getenv('AWS_ACCESS_KEY_ID')
        self.aws_secret_access_key = aws_secret_access_key or os.getenv('AWS_SECRET_ACCESS_KEY')
        
        # Check if credentials are available
        if not self.aws_access_key_id or not self.aws_secret_access_key:
            raise ValueError("Missing AWS credentials in .env file or constructor parameters")

        # Determine sandbox mode (with optional override)
        if use_sandbox is None:
            # Read from environment if not provided in constructor
            self.use_sandbox = os.getenv('MTURK_SANDBOX', 'True').lower() == 'true'
        else:
            self.use_sandbox = use_sandbox

        # Set endpoint based on sandbox mode
        region = os.getenv('AWS_REGION', 'us-east-1')
        endpoint = (
            'https://mturk-requester-sandbox.us-east-1.amazonaws.com'
            if self.use_sandbox else
            'https://mturk-requester.us-east-1.amazonaws.com'
        )

        # Create boto3 client
        try:
            self.client = boto3.client(
                'mturk',
                aws_access_key_id=self.aws_access_key_id,
                aws_secret_access_key=self.aws_secret_access_key,
                region_name=region,
                endpoint_url=endpoint
            )
            print(f"MTurk client initialized in {'Sandbox' if self.use_sandbox else 'Production'} mode")
            
            # Verify connection by checking account balance
            self.get_account_balance()
            
        except Exception as e:
            print(f"Error initializing MTurk client: {str(e)}")
            print("Please verify your AWS credentials and MTurk account configuration.")
            print("For MTurk integration, you need:")
            print("1. Valid AWS credentials in your .env file")
            print("2. Your AWS account linked to your MTurk Requester account")
            print("3. Proper permissions for the MTurk API")
            
            # Create a dummy client for graceful degradation
            self.client = None
            self.connection_error = str(e)
            
    def get_account_balance(self):
        """Get the available MTurk account balance"""
        if not self.client:
            print(f"Cannot check balance: {self.connection_error}")
            return 0.0
            
        try:
            response = self.client.get_account_balance()
            balance = response['AvailableBalance']
            print(f"MTurk account balance: ${balance}")
            return float(balance)
        except Exception as e:
            print(f"Error checking balance: {str(e)}")
            return 0.0
    
    def create_hit_with_survey_link(self, survey_link, hit_config=None):
        """
        Create an MTurk HIT with a link to a Qualtrics survey
        
        Args:
            survey_link (str): URL to the Qualtrics survey
            hit_config (dict, optional): Custom configuration for the HIT
            
        Returns:
            str: HIT ID
        """
        print("Creating MTurk HIT with survey link")
        
        # Default HIT configuration
        if not hit_config:
            hit_config = {
                'Title': 'Complete a short survey',
                'Description': 'We need your input for a quick survey that should take less than 10 minutes',
                'Keywords': 'survey, research, opinion, feedback',
                'Reward': '0.50',
                'MaxAssignments': 100,
                'LifetimeInSeconds': 86400,  # 1 day
                'AssignmentDurationInSeconds': 1800,  # 30 minutes
                'AutoApprovalDelayInSeconds': 86400,  # 1 day
                'QualificationRequirements': []
            }
        
        # Create the HTML question with the survey link
        question_html = f"""
        <HTMLQuestion xmlns="http://mechanicalturk.amazonaws.com/AWSMechanicalTurkDataSchemas/2011-11-11/HTMLQuestion.xsd">
            <HTMLContent><![CDATA[
                <!DOCTYPE html>
                <html>
                <head>
                    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
                    <script type='text/javascript' src='https://s3.amazonaws.com/mturk-public/externalHIT_v1.js'></script>
                </head>
                <body>
                    <form name='mturk_form' method='post' id='mturk_form' action='https://www.mturk.com/mturk/externalSubmit'>
                        <input type='hidden' value='' name='assignmentId' id='assignmentId'/>
                        <h1>Survey Task</h1>
                        <p>Please complete the survey at the following link:</p>
                        <p><a href='{survey_link}' target='_blank'>{survey_link}</a></p>
                        <p>After completing the survey, you will receive a completion code. Enter the code below:</p>
                        <p><input type='text' name='completion_code' id='completion_code' size='40'/></p>
                        <p><input type='submit' id='submitButton' value='Submit' /></p>
                    </form>
                    <script language='Javascript'>
                        turkSetAssignmentID();
                    </script>
                </body>
                </html>
            ]]></HTMLContent>
            <FrameHeight>600</FrameHeight>
        </HTMLQuestion>
        """
        
        # Create the HIT
        response = self.client.create_hit(
            Title=hit_config['Title'],
            Description=hit_config['Description'],
            Keywords=hit_config['Keywords'],
            Reward=hit_config['Reward'],
            MaxAssignments=hit_config['MaxAssignments'],
            LifetimeInSeconds=hit_config['LifetimeInSeconds'],
            AssignmentDurationInSeconds=hit_config['AssignmentDurationInSeconds'],
            AutoApprovalDelayInSeconds=hit_config['AutoApprovalDelayInSeconds'],
            Question=question_html,
            QualificationRequirements=hit_config['QualificationRequirements']
        )
        
        hit_id = response['HIT']['HITId']
        hit_type_id = response['HIT']['HITTypeId']
        
        print(f"HIT created successfully with ID: {hit_id}")
        
        # Print the HIT URL
        if self.use_sandbox:
            worker_url = f"https://workersandbox.mturk.com/mturk/preview?groupId={hit_type_id}"
        else:
            worker_url = f"https://worker.mturk.com/mturk/preview?groupId={hit_type_id}"
            
        print(f"Workers can access the HIT at: {worker_url}")
        
        return hit_id
    
    def get_hit_assignments(self, hit_id):
        """
        Get all assignments for a HIT
        
        Args:
            hit_id (str): ID of the HIT
            
        Returns:
            list: List of assignment dictionaries
        """
        print(f"Getting assignments for HIT: {hit_id}")
        
        # List to store all assignments
        all_assignments = []
        
        # Get assignments with pagination
        next_token = None
        
        while True:
            if next_token:
                response = self.client.list_assignments_for_hit(
                    HITId=hit_id,
                    NextToken=next_token,
                    MaxResults=100
                )
            else:
                response = self.client.list_assignments_for_hit(
                    HITId=hit_id,
                    MaxResults=100
                )
            
            all_assignments.extend(response['Assignments'])
            
            if 'NextToken' in response:
                next_token = response['NextToken']
            else:
                break
        
        print(f"Found {len(all_assignments)} assignments")
        return all_assignments
    
    def approve_assignments(self, assignments, feedback=None):
        """
        Approve multiple assignments
        
        Args:
            assignments (list): List of assignment dictionaries or IDs
            feedback (str, optional): Feedback to workers
            
        Returns:
            int: Number of successfully approved assignments
        """
        approved_count = 0
        
        for assignment in assignments:
            # Extract assignment ID if a dictionary was provided
            assignment_id = assignment['AssignmentId'] if isinstance(assignment, dict) else assignment
            
            try:
                self.client.approve_assignment(
                    AssignmentId=assignment_id,
                    RequesterFeedback=feedback if feedback else "Thank you for your participation!"
                )
                approved_count += 1
            except Exception as e:
                print(f"Error approving assignment {assignment_id}: {str(e)}")
        
        print(f"Successfully approved {approved_count} assignments")
        return approved_count
    
    def delete_hit(self, hit_id):
        """
        Delete a HIT
        
        Args:
            hit_id (str): ID of the HIT to delete
            
        Returns:
            bool: True if successful
        """
        try:
            # Get the HIT status
            hit = self.client.get_hit(HITId=hit_id)
            status = hit['HIT']['HITStatus']
            
            # If the HIT is reviewable, dispose of it
            if status == 'Reviewable':
                self.client.delete_hit(HITId=hit_id)
                print(f"HIT {hit_id} deleted successfully")
                return True
            
            # If the HIT is assignable, expire it first then delete it
            elif status == 'Assignable':
                self.client.update_expiration_for_hit(
                    HITId=hit_id,
                    ExpireAt=datetime(2015, 1, 1)  # Set to a past date to expire immediately
                )
                time.sleep(1)  # Give time for the HIT to update
                self.client.delete_hit(HITId=hit_id)
                print(f"HIT {hit_id} expired and deleted successfully")
                return True
                
            else:
                print(f"Cannot delete HIT {hit_id}, status is {status}")
                return False
                
        except Exception as e:
            print(f"Error deleting HIT {hit_id}: {str(e)}")
            return False

# Add the missing QualtricsAndMTurkAutomation class
class QualtricsAndMTurkAutomation:
    """Handles the automation of creating Qualtrics surveys and MTurk HITs"""
    
    def __init__(self):
        """Initialize the automation with Qualtrics and MTurk clients"""
        self.qualtrics_client = QualtricsClient()
        self.mturk_client = MTurkClient()
    
    def run(self, qualtrics_payload, hit_config=None):
        """
        Run the automation to create a Qualtrics survey and MTurk HIT
        
        Args:
            qualtrics_payload (dict): Survey definition for Qualtrics
            hit_config (dict, optional): Configuration for MTurk HIT
            
        Returns:
            dict: Results including survey ID, survey link, and HIT ID
        """
        print("Starting Qualtrics and MTurk automation...")
        
        # Create Qualtrics survey
        survey_name = qualtrics_payload.get("SurveyName", "New Survey")
        survey_id = self.qualtrics_client.create_survey(survey_name, qualtrics_payload)
        
        # Activate the survey
        self.qualtrics_client.activate_survey(survey_id)
        
        # Get distribution link
        survey_link = self.qualtrics_client.create_distribution_link(survey_id)
        
        # Create MTurk HIT
        hit_id = self.mturk_client.create_hit_with_survey_link(survey_link, hit_config)
        
        # Return results
        return {
            "survey_id": survey_id,
            "survey_link": survey_link,
            "hit_id": hit_id
        }
    
    def collect_and_process_results(self, survey_id, hit_id, auto_approve=True):
        """
        Collect and process results from Qualtrics and MTurk
        
        Args:
            survey_id (str): Qualtrics survey ID
            hit_id (str): MTurk HIT ID
            auto_approve (bool): Whether to automatically approve assignments
            
        Returns:
            dict: Results including responses and assignment data
        """
        # Get Qualtrics responses
        responses = self.qualtrics_client.get_survey_responses(survey_id)
        
        # Save responses to CSV
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        csv_filename = f"survey_responses_{timestamp}.csv"
        responses.to_csv(csv_filename, index=False)
        print(f"Saved {len(responses)} responses to {csv_filename}")
        
        # Get MTurk assignments
        assignments = self.mturk_client.get_hit_assignments(hit_id)
        
        # Auto-approve assignments if requested
        approved_count = 0
        if auto_approve and assignments:
            approved_count = self.mturk_client.approve_assignments(assignments)
        
        # Return results
        return {
            "responses": responses,
            "csv_filename": csv_filename,
            "assignments": assignments,
            "approved_count": approved_count
        }

# Fix the SurveyFlow class to work with crewAI's updated API
from crewai import Flow

class SurveyFlow(Flow):
    """Flow for processing a survey from text to deployment"""
    
    def __init__(self):
        """Initialize the flow"""
        super().__init__()
        self._custom_state = {}  # Use a custom state dictionary instead
        self.verbose = True
    
    async def kickoff_async(self, inputs):
        """Process survey and create Qualtrics survey and MTurk HIT"""
        # Store inputs in our custom state
        self._custom_state.update(inputs)
        
        # Get the survey text from state
        survey_text = self._custom_state['survey_text'].strip()
        first_line = survey_text.splitlines()[0]
        topic = first_line.replace('Topic:', '').strip()
        current_year = datetime.now().year

        # Initialize agents and tasks
        convert_agent, editor_agent = load_agents()
        convert_task, research_task, comment_task, improve_task = load_tasks(convert_agent, editor_agent)
        
        # Create task input dictionary
        task_inputs = {
            'survey_text': survey_text,
            'topic': topic,
            'current_year': current_year
        }
        # Include knowledge. Everything in the knowledge file
        content_source = CrewDoclingSource(
            file_paths=[ 
                "diamantopoulos-winklhofer-2001-index-construction-with-formative-indicators-an-alternative-to-scale-development.pdf",
                "Marketing_survey_research_best_practice_2018.pdf",
            ],
        )

        # Create the crew
        survey_crew = Crew(
            agents=[convert_agent, editor_agent],
            tasks=[convert_task, research_task, comment_task, improve_task],
            process=Process.sequential,
            knowledge_sources=[content_source],
            verbose=True
        )

        # Run the crew to process the survey
        crew_result = survey_crew.kickoff(
            inputs=task_inputs
        )

        # Parse the result
        raw = crew_result.raw.strip()
        if raw.startswith("```"):
            raw = raw.split("\n", 1)[1].rsplit("```", 1)[0]
    
        try:
            survey_dict = json.loads(raw)
        except json.JSONDecodeError as e:
            raise ValueError(f"JSON parsing failed: {e}\nRaw output:\n{raw}")

        # Print the results safely - enhanced with better error handling
        print("\n===== Survey Results =====")
        try:
            # Try to get original survey with comments
            if 'original_with_comments' in survey_dict:
                print("\n=== Original Survey (with comments) ===")
                annotated = survey_dict.get('original_with_comments', {})
                survey = annotated.get('survey', {})
                
                # Print theme and purpose safely
                print(f"Theme: {survey.get('theme', 'N/A')}")
                print(f"Purpose: {survey.get('purpose', 'N/A')}\n")
                
                # Safely iterate through questions
                comments = annotated.get('question_comments', [])
                for q in survey.get('questions', []):
                    if isinstance(q, dict):
                        qid = q.get('question_id', 'unknown')
                        print(f"Question {qid}: {q.get('question_text', 'N/A')}")
                        comment = next((c.get('comment', '') for c in comments if c.get('question_id') == qid), None)
                        if comment:
                            print(f"  Comment: {comment}")
                        print()
                
                # Print overall comment if available
                overall = annotated.get('overall_comment')
                if overall:
                    print(f"Overall comment: {overall}\n")
            
            # Try to get revised survey
            if 'revised_survey' in survey_dict:
                print("=== Revised Survey ===")
                revised = survey_dict.get('revised_survey', {})
                
                # Print theme and purpose safely
                print(f"Theme:   {revised.get('theme', 'N/A')}")
                print(f"Purpose: {revised.get('purpose', 'N/A')}\n")
                
                # Safely iterate through questions
                for q in revised.get('questions', []):
                    if isinstance(q, dict):
                        qid = q.get('question_id', 'unknown')
                        print(f"Q{qid}: {q.get('question_text', 'N/A')}")
                        
                        # Safely get options
                        input_config = q.get('input_config', {})
                        if isinstance(input_config, dict):
                            opts = input_config.get('options', [])
                            if opts:
                                print("  Options:")
                                for o in opts:
                                    print(f"    - {o}")
                        print()
            
            # If standard format is not found, print raw structure
            if 'original_with_comments' not in survey_dict and 'revised_survey' not in survey_dict:
                print("Survey output doesn't match expected structure. Raw output:")
                print(json.dumps(survey_dict, indent=2))
                
        except Exception as e:
            print(f"Error parsing survey structure: {str(e)}")
            print("Raw survey data:")
            print(json.dumps(survey_dict, indent=2))
            
        # Store the survey dict in our custom state
        self._custom_state['survey_dict'] = survey_dict
        return survey_dict

def main():
    """Main function to run the survey processing and deployment flow"""
    # Load environment variables
    load_dotenv()
    
    # Display input requirements
    print("========================================")
    print("⚠️  INPUT REQUIREMENTS:")
    print("- You must include a line starting with 'Topic:'")
    print("- You must include at least one line starting with 'Questions:'")
    print("Otherwise, the survey cannot be processed.")
    print("========================================")
    
    # Get survey input
    survey_to_process = input("Please enter the Survey content: ")

    # Initialize and run the flow - FIXED: completely removed parameters
    flow = SurveyFlow()
    
    # Execute the flow using nest_asyncio to handle Jupyter's event loop
    import nest_asyncio
    nest_asyncio.apply()
    
    # Now we can use asyncio.run inside Jupyter
    survey_dict = asyncio.run(flow.kickoff_async(inputs={
        'survey_text': survey_to_process
    }))

    # Convert the survey to Qualtrics format
    qualtrics_payload = survey_dict_to_qualtrics_payload(survey_dict)

    # Create HIT configuration
    hit_config = {
        'Title': 'Complete a short survey on organic food',
        'Description': survey_dict["revised_survey"]["purpose"],
        'Keywords': 'survey, research, feedback',
        'Reward': '0.75',
        'MaxAssignments': 100,
        'LifetimeInSeconds': 86400,
        'AssignmentDurationInSeconds': 1800,
        'AutoApprovalDelayInSeconds': 86400,
        'QualificationRequirements': []
    }
    
    # Run the automation to create the survey and HIT
    automation = QualtricsAndMTurkAutomation()
    results = automation.run(qualtrics_payload, hit_config)
    
    print("\nResults:")
    print(f"Survey ID: {results['survey_id']}")
    print(f"Survey Link: {results['survey_link']}")
    print(f"HIT ID: {results['hit_id']}")
    
    return results

if __name__ == "__main__":
    main()

⚠️  INPUT REQUIREMENTS:
- You must include a line starting with 'Topic:'
- You must include at least one line starting with 'Questions:'
Otherwise, the survey cannot be processed.


Downloading detection model, please wait. This may take several minutes depending upon your network connection.
Downloading recognition model, please wait. This may take several minutes depending upon your network connection.


[93m 


[1m[95m# Agent:[00m [1m[92mSurvey Content Conversion Agent[00m
[95m## Task:[00m [92mConvert the following survey (provided as raw text) into a structured JSON schema suitable for creating a survey in Qualtrics or similar platforms:
Topic: Social Media Usage  Questions: 1. How many hours per day do you spend on social media? 2. What platforms do you use the most? (e.g., Instagram, Twitter, TikTok) 3. Do you feel social media has a positive or negative impact on your mental health? 4. Why do you use social media? (Open text)
Make sure your output includes: - A top-level "title" field for the survey title - A "fields" array, where each element has:
  • "title": the question text  
  • "type": the question type (e.g. "multiple_choice", "text_input")  
  • "options": a list of answer options (if applicable; omit or set to [] otherwise)
[00m




LiteLLM.Info: If you need to debug this error, use `litellm._turn_on_debug()'.



ERROR:root:LiteLLM call failed: litellm.AuthenticationError: AuthenticationError: OpenAIException - The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable


[91m Error during LLM call: litellm.AuthenticationError: AuthenticationError: OpenAIException - The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable[00m
[91m An unknown error occurred. Please check the details below.[00m
[91m Error details: litellm.AuthenticationError: AuthenticationError: OpenAIException - The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable[00m


AuthenticationError: litellm.AuthenticationError: AuthenticationError: OpenAIException - The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable

In [None]:
# This cell collects data from a completed survey
def collect_survey_data():
    # Get survey ID and HIT ID
    survey_id = input("Enter your Qualtrics Survey ID: ")
    hit_id = input("Enter your MTurk HIT ID: ")
    
    print(f"Ready to collect data for Survey ID: {survey_id} and HIT ID: {hit_id}")

    # Create automation instance
    automation = QualtricsAndMTurkAutomation()
    
    # Collect and process results
    collected_data = automation.collect_and_process_results(
        survey_id=survey_id,
        hit_id=hit_id,
        auto_approve=True 
    )

    print("Data collection completed. Summary:")
    print(collected_data)

    if 'responses' in collected_data:
        display(collected_data['responses'])
    else:
        print("No responses collected.")
        
    return collected_data

# Run the function if executed directly
collected_data = collect_survey_data()

Ready to collect data for Survey ID: SV_ehDc3d6TiGQiefY and HIT ID: 3FJ2RVH26P35OQIGA3KFR9ISICK296
Current working directory: /Users/princess/Documents/RA/Field-Experiment-AI-Agent
Found .env file in current directory
API Token loaded (masked): yNZ9********************************eAPM
Data center: yul1
Testing Qualtrics API connection...
Connection successful! Authenticated as: Sichen Zhong
MTurk client initialized in Sandbox mode
MTurk account balance: $10000.00
Downloading responses for survey: SV_ehDc3d6TiGQiefY
Export progress: 0.0%
Export progress: 100.0%
Successfully downloaded 2 responses
Saved 2 responses to survey_responses_20250513_135954.csv
Getting assignments for HIT: 3FJ2RVH26P35OQIGA3KFR9ISICK296
Found 0 assignments
Data collection completed. Summary:
{'responses':                                  StartDate  \
0                               Start Date   
1  {"ImportId":"startDate","timeZone":"Z"}   

                                 EndDate                 Status  \
0  

  datetime_now = datetime.datetime.utcnow()


Unnamed: 0,StartDate,EndDate,Status,IPAddress,Progress,Duration (in seconds),Finished,RecordedDate,ResponseId,RecipientLastName,RecipientFirstName,RecipientEmail,ExternalReference,LocationLatitude,LocationLongitude,DistributionChannel,UserLanguage
0,Start Date,End Date,Response Type,IP Address,Progress,Duration (in seconds),Finished,Recorded Date,Response ID,Recipient Last Name,Recipient First Name,Recipient Email,External Data Reference,Location Latitude,Location Longitude,Distribution Channel,User Language
1,"{""ImportId"":""startDate"",""timeZone"":""Z""}","{""ImportId"":""endDate"",""timeZone"":""Z""}","{""ImportId"":""status""}","{""ImportId"":""ipAddress""}","{""ImportId"":""progress""}","{""ImportId"":""duration""}","{""ImportId"":""finished""}","{""ImportId"":""recordedDate"",""timeZone"":""Z""}","{""ImportId"":""_recordId""}","{""ImportId"":""recipientLastName""}","{""ImportId"":""recipientFirstName""}","{""ImportId"":""recipientEmail""}","{""ImportId"":""externalDataReference""}","{""ImportId"":""locationLatitude""}","{""ImportId"":""locationLongitude""}","{""ImportId"":""distributionChannel""}","{""ImportId"":""userLanguage""}"
