In [None]:
import os
import yaml
import json
import warnings
from datetime import datetime
from typing import List, Union, Literal, Optional

from pydantic import BaseModel
from crewai import Agent, Task, Crew, Flow, Process
from crewai.flow.flow import start
from crewai.tasks.task_output import OutputFormat

import requests
import zipfile
import io
import time
import pandas as pd
import boto3
from dotenv import load_dotenv
import logging
logging.getLogger("opentelemetry").setLevel(logging.ERROR)
logging.getLogger("urllib3").setLevel(logging.ERROR)
os.environ["OTEL_TRACES_EXPORTER"] = "none"

# ========== Pydantic Models ==========
class ChoiceOption(BaseModel):
    text: str
    value: str

class ChoiceConfig(BaseModel):
    options: List[ChoiceOption]

class SliderConfig(BaseModel):
    min: float
    max: float
    step: float

class TextInputConfig(BaseModel):
    placeholder: Optional[str] = None
    multiline: bool = False

class Question(BaseModel):
    question_id: str
    question_text: str
    input_type: Literal["multiple_choice", "single_choice", "slider", "text_input"]
    input_config: Union[ChoiceConfig, SliderConfig, TextInputConfig]

class Survey(BaseModel):
    theme: str
    purpose: str
    questions: List[Question]

class QuestionComment(BaseModel):
    question_id: str
    comment: str

class AnnotatedSurvey(BaseModel):
    survey: Survey
    question_comments: List[QuestionComment]
    overall_comment: Optional[str]

class SurveyImprovementResult(BaseModel):
    original_with_comments: AnnotatedSurvey
    revised_survey: Survey

# ========== Utility to load YAML ==========
def load_yaml(path: str) -> dict:
    with open(path, 'r', encoding='utf-8') as f:
        return yaml.safe_load(f)

# ========== Agent Definitions ==========
conv_cfg = load_yaml("config/agents/survey_convert_agent.yaml")["survey_convert_agent"]
convert_agent = Agent(
    name="survey_convert_agent",
    role=conv_cfg["role"],
    goal=conv_cfg["goal"],
    backstory=conv_cfg["backstory"],
    verbose=conv_cfg["verbose"],
    allow_delegation=conv_cfg["allow_delegation"]
)

edit_cfg = load_yaml("config/agents/survey_editor.yaml")["survey_editor"]
editor_agent = Agent(
    name="survey_editor_agent",
    role=edit_cfg["role"],
    goal=edit_cfg["goal"],
    backstory=edit_cfg["backstory"],
    verbose=edit_cfg["verbose"],
    allow_delegation=edit_cfg["allow_delegation"]
)

# ========== Task Definitions ==========
conv_t = load_yaml("config/tasks/convert_survey_to_json.yaml")["convert_survey_to_json"]
convert_task = Task(
    name="convert_survey_to_json",
    description=conv_t["description"],
    agent=convert_agent,
    tool=conv_t.get("tool"),
    inputs=list(conv_t.get("inputs", {}).keys()),
    outputs=list(conv_t.get("outputs", {}).keys()),
    expected_output=conv_t["expected_output"],
    output_format=OutputFormat.JSON
)

res_t = load_yaml("config/tasks/apply_survey_enhancements.yaml")["research_task"]
research_task = Task(
    name="research_task",
    description=res_t["description"],
    agent=convert_agent,
    inputs=list(res_t.get("inputs", {}).keys()),
    outputs=list(res_t.get("outputs", {}).keys()),
    expected_output=res_t["expected_output"],
    output_format=OutputFormat.JSON
)

imp_t = load_yaml("config/tasks/apply_survey_enhancements.yaml")["improve_survey"]
improve_task = Task(
    name="improve_survey",
    description=imp_t["description"],
    agent=editor_agent,
    inputs=list(imp_t.get("inputs", {}).keys()),
    outputs=list(imp_t.get("outputs", {}).keys()),
    expected_output=imp_t["expected_output"],
    output_format=OutputFormat.JSON
)

# ========== Crew Definition ==========
survey_crew = Crew(
    agents=[convert_agent, editor_agent],
    tasks=[convert_task, research_task, improve_task],
    process=Process.sequential,
    verbose=True
)


class SurveyFlow(Flow):
    @start()
    def initial_run(self):
        survey_text = self.state['survey_text'].strip()
        first_line = survey_text.splitlines()[0]
        topic = first_line.replace('Topic:', '').strip()
        current_year = datetime.now().year


        crew_result = survey_crew.kickoff(
            inputs={
                'survey_text': survey_text,
                'topic': topic,
                'current_year': current_year
            }
        )

    
        raw = crew_result.raw.strip()
        if raw.startswith("```"):
            raw = raw.split("\n", 1)[1].rsplit("```", 1)[0]
    
        survey_dict = json.loads(raw)
        # validated = SurveyImprovementResult.parse_obj({
        #     "original_with_comments": survey_dict["original_with_comments"],
        #     "revised_survey":         survey_dict["revised_survey"]
        # })

        raw = crew_result.raw.strip()
        if raw.startswith('```') and raw.endswith('```'):
            raw = raw.split('\n', 1)[1].rsplit('```', 1)[0]
        try:
            survey_dict = json.loads(raw)
        except json.JSONDecodeError as e:
            raise ValueError(f"解析 JSON 失败: {e}\nRaw output:\n{raw}")

        # Pretty-print the original survey with comments
        annotated = survey_dict.get('original_with_comments', {})
        survey = annotated.get('survey', {})
        comments = annotated.get('question_comments', [])
        print("\n=== Original Survey (with comments) ===")
        print(f"Theme: {survey.get('theme', '')}")
        print(f"Purpose: {survey.get('purpose', '')}\n")
        for q in survey.get('questions', []):
            qid = q.get('question_id')
            print(f"Question {qid}: {q.get('question_text')}")
            comment = next((c['comment'] for c in comments if c['question_id'] == qid), None)
            if comment:
                print(f"  Comment: {comment}")
            print()
        overall = annotated.get('overall_comment')
        if overall:
            print(f"Overall comment: {overall}\n")
            
        revised = survey_dict.get('revised_survey', {})
        print("=== Revised Survey ===")
        print(f"Theme:   {revised['theme']}")
        print(f"Purpose: {revised['purpose']}\n")
        for q in revised['questions']:
            print(f"Q{q['question_id']}: {q['question_text']}")
            opts = q['input_config'].get('options')
            if opts:
                print("  Options:")
                for o in opts:
                    print(f"    - {o}")
            print()
        self.state['survey_dict'] = survey_dict
        return survey_dict

        
flow = SurveyFlow(
    agents=[convert_agent, editor_agent],
    tasks=[convert_task, research_task, improve_task],
    process=Process.sequential,
    verbose=True
)
        
import re

def survey_dict_to_qualtrics_payload(survey_dict: dict) -> dict:
    """
    将自定义 survey_dict 转成 Qualtrics v3 API 的 survey-definitions payload
    支持题型：multiple_choice, single_choice, slider, text_input
    """
    survey_meta = survey_dict["revised_survey"]
    payload = {
        "SurveyName":      survey_meta.get("theme", "New Survey"),
        "Language":        "EN",
        "ProjectCategory": "CORE",
        "Questions":       {}
    }

    for q in survey_meta["questions"]:
        raw_id = q["question_id"]                  # e.g. "q4"
        num    = re.sub(r'\D+', '', raw_id)        # 提取数字 "4"
        qid    = f"QID{num}"                       # 组装 "QID4"

        qt  = q["question_text"]
        it  = q["input_type"]
        cfg = q["input_config"]

        # ---- 通用字段 ----
        qobj = {
            "QuestionText":      qt,
            "DataExportTag":     qid,
            "Configuration":     {"QuestionDescriptionOption": "UseText"},
            "Validation":        {"Settings": {"ForceResponse": "OFF", "Type": "None"}}
        }

        # ---- 多／单选题 ----
        if it in ("multiple_choice", "single_choice"):
            choices = {}
            for opt in cfg.get("options", []):
                if "=" in opt:
                    idx, txt = opt.split("=", 1)
                    idx, txt = idx.strip(), txt.strip()
                else:
                    idx = str(len(choices) + 1)
                    txt = opt.strip()
                choices[idx] = {"Display": txt}

            qobj.update({
                "QuestionType": "MC",
                "Selector":     "SAVR" if it == "multiple_choice" else "SINGLE",
                "SubSelector":  "TX",
                "Choices":      choices
            })

        # ---- 滑块题 ----
        elif it == "slider":
            # 从 cfg 安全读取滑块参数（默认 0–100，步长 1）
            start = cfg.get("min", cfg.get("start", 0))
            end   = cfg.get("max", cfg.get("end", 100))
            step  = cfg.get("step", cfg.get("stepSize", 1))
        
            # 把所有滑块设置放在 qobj 根部
            
            qobj.update({
                "QuestionType": "SL",
                "Selector":     "Slider",
                "SubSelector":  "SL",
                "SliderStart":  start,
                "SliderEnd":    end,
                "SliderStep":   step
            })
        elif it == "text_input":   
            qobj.update({
                "QuestionType": "TE",
                "Selector":     "ML"   # Text Entry 必须用 ML
            })

        else:
            raise ValueError(f"Unsupported input_type: {it!r}")

        # 插入最终 payload
        payload["Questions"][qid] = qobj

    return payload


# def survey_dict_to_qualtrics_payload(survey_dict: dict) -> dict:
#     """
#     input survey_dict
#       - theme(for SurveyName)
#       - purpose (Description)
#       - questions: List of { question_id, question_text, input_type, input_config }
#     Qualtrics v3 API 的 survey-definitions JSON
#     """

#     survey_meta = survey_dict["revised_survey"] 
#     payload = {
#         "SurveyName": survey_meta.get("theme", "New Survey"),
#         "Language": "EN",
#         "ProjectCategory": "CORE",
#         "Questions": {}
#     }
#     questions = survey_meta["questions"]
#     for q in questions:
#         qid = q["question_id"]
#         qt = q["question_text"]
#         it = q["input_type"]
#         cfg = q["input_config"]

#         qobj = {
#             "QuestionText": qt,
#             "Configuration": {"QuestionDescriptionOption": "UseText"},
#             "Validation": {"Settings": {"ForceResponse": "OFF", "Type": "None"}}
#         }

#         if it in ("multiple_choice", "single_choice"):
#             choices_dict = {}
#             for opt in cfg["options"]:
#                 if "=" in opt:
#                     index, text = opt.split("=", 1)
#                     choices_dict[index.strip()] = {"Display": text.strip()}
#                 else:
#                     idx = str(len(choices_dict)+1)
#                     choices_dict[idx] = {"Display": opt.strip()}
            
#             qobj.update({
#                 "QuestionType": "MC",
#                 "Selector": "SAVR" if it == "multiple_choice" else "SINGLE",
#                 "SubSelector": "TX",
#                 "Choices": choices_dict
#             })

#         elif it == "slider":
#             qobj.update({
#                 "QuestionType": "SL",
#                 "Selector": "Slider",
#                 "SubSelector": "SL"
#             })
         

#         elif it == "text_input":
#             qobj.update({
#                 "QuestionType": "TE",
#                 "Selector": "ML" if cfg.get("multiline", False) else "TX"
#             })

#         else:
#             raise ValueError(f"Unsupported input_type: {it}")
        
#         payload["Questions"][qid] = qobj

#     return payload

class QualtricsClient:
    """Handles all Qualtrics API interactions"""
    
    def __init__(self):
        """Initialize Qualtrics API client with credentials from .env file"""
        # Print current working directory to help debug file path issues
        print(f"Current working directory: {os.getcwd()}")
        
        # Check if .env file exists
        if os.path.exists('.env'):
            print("Found .env file in current directory")
        else:
            print("WARNING: No .env file found in current directory!")
            
        # Load environment variables
        load_dotenv(verbose=True)
        
        self.api_token = os.getenv('QUALTRICS_API_TOKEN')
        self.data_center = os.getenv('QUALTRICS_DATA_CENTER')
        self.directory_id = os.getenv('QUALTRICS_DIRECTORY_ID')
        
        # Print obfuscated token for debugging (only first/last 4 chars)
        if self.api_token:
            token_length = len(self.api_token)
            masked_token = self.api_token[:4] + '*' * (token_length - 8) + self.api_token[-4:] if token_length > 8 else "****"
            print(f"API Token loaded (masked): {masked_token}")
        else:
            print("WARNING: No API token found in environment variables!")
            
        if self.data_center:
            print(f"Data center: {self.data_center}")
        else:
            print("WARNING: No data center found in environment variables!")
        
        if not self.api_token or not self.data_center:
            raise ValueError("Missing Qualtrics API credentials in .env file")
            
        # Set up base URL for API requests
        self.base_url = f"https://{self.data_center}.qualtrics.com/API/v3/"
        self.headers = {
            "X-API-Token": self.api_token,
            "Content-Type": "application/json"
        }
        
        # Test connection
        print("Testing Qualtrics API connection...")
        try:
            test_url = f"{self.base_url}whoami"
            response = requests.get(test_url, headers=self.headers)
            if response.status_code == 200:
                user_info = response.json()["result"]
                print(f"Connection successful! Authenticated as: {user_info.get('firstName', '')} {user_info.get('lastName', '')}")
            else:
                print(f"Connection test failed with status code: {response.status_code}")
                print(f"Response: {response.text}")
        except Exception as e:
            print(f"Error testing connection: {str(e)}")
        
    def create_survey(self, survey_name, survey_template=None):
        """
        Create a new survey in Qualtrics
        
        Args:
            survey_name (str): Name of the survey
            survey_template (dict, optional): Survey template JSON
            
        Returns:
            str: Survey ID of the created survey
        """
        print(f"Creating survey: {survey_name}")
        
        # If no template is provided, use a basic template
        if not survey_template:
            # Define the survey payload with required fields including ProjectCategory
            survey_payload = {
                "SurveyName": survey_name,
                "Language": "EN",
                "ProjectCategory": "CORE", # This is the required field that was missing
                "Questions": {
                    "QID1": {
                        "QuestionText": "What is your age?",
                        "QuestionType": "MC",
                        "Selector": "SAVR", # Required selector for multiple choice questions
                        "SubSelector": "TX", # Text selector
                        "Configuration": {
                            "QuestionDescriptionOption": "UseText"
                        },
                        "Validation": {
                            "Settings": {
                                "ForceResponse": "OFF",
                                "Type": "None"
                            }
                        },
                        "Choices": {
                            "1": {"Display": "18-24"},
                            "2": {"Display": "25-34"},
                            "3": {"Display": "35-44"},
                            "4": {"Display": "45-54"},
                            "5": {"Display": "55-64"},
                            "6": {"Display": "65+"}
                        }
                    },
                    "QID2": {
                        "QuestionText": "How satisfied are you with our product?",
                        "QuestionType": "Likert",
                        "Selector": "LSL", # Likert scale
                        "SubSelector": "TX", # Text selector
                        "Configuration": {
                            "QuestionDescriptionOption": "UseText"
                        },
                        "Validation": {
                            "Settings": {
                                "ForceResponse": "OFF",
                                "Type": "None"
                            }
                        },
                        "Choices": {
                            "1": {"Display": "Very dissatisfied"},
                            "2": {"Display": "Dissatisfied"},
                            "3": {"Display": "Neutral"},
                            "4": {"Display": "Satisfied"},
                            "5": {"Display": "Very satisfied"}
                        }
                    },
                    "QID3": {
                        "QuestionText": "Any additional comments?",
                        "QuestionType": "TE", # Text entry
                        "Selector": "ML", # Multi-line
                        "Configuration": {
                            "QuestionDescriptionOption": "UseText"
                        },
                        "Validation": {
                            "Settings": {
                                "ForceResponse": "OFF",
                                "Type": "None"
                            }
                        }
                    }
                }
            }
        else:
            # If a template is provided, make sure it includes ProjectCategory
            survey_payload = survey_template
            if "ProjectCategory" not in survey_payload:
                survey_payload["ProjectCategory"] = "CORE"
        
        # Create survey
        url = f"{self.base_url}survey-definitions"
        payload = json.dumps(survey_payload)
        
        print(f"Sending payload to Qualtrics: {payload[:200]}...")
        
        response = requests.post(url, headers=self.headers, data=payload)
        
        if response.status_code != 200:
            print(f"Error response: {response.text}")
            raise Exception(f"Failed to create survey: {response.text}")
        
        result = response.json()
        survey_id = result["result"]["SurveyID"]
        print(f"Survey created successfully with ID: {survey_id}")
        
        return survey_id

    def add_questions(self, survey_id: str, questions: List[dict]):
        for q in questions:
            # start with the fields every question needs
            q_payload = {
                 "QuestionID":   q["question_id"],
                 "QuestionText": q["question_text"],
                 "QuestionType": q["QuestionType"],
                 "DataExportTag": q["question_id"],    # 
                 "Configuration": {"QuestionDescriptionOption": "UseText"},
                 "Validation":    {"Settings": {"ForceResponse": "OFF", "Type": "None"}},
             }

            # q_payload = {
            #     "QuestionID":   q["question_id"],
            #     "QuestionText": q["question_text"],
            #     "QuestionType": q["QuestionType"],
            #     "Configuration": {"QuestionDescriptionOption": "UseText"},
            #     "Validation":    {"Settings": {"ForceResponse": "OFF", "Type": "None"}},
            # }
            # only add Selector/SubSelector if given
            if "Selector" in q:
                q_payload["Selector"] = q["Selector"]
            if "SubSelector" in q:
                q_payload["SubSelector"] = q["SubSelector"]
            # only add Choices if given
            if "Choices" in q:
                q_payload["Choices"] = q["Choices"]
    
            url = f"{self.base_url}survey-definitions/{survey_id}/questions"
            resp = requests.post(url, headers=self.headers, json=q_payload)
            print(f"POST questions → {resp.status_code}", resp.json())


    # def add_block(self, survey_id: str, block_id: str, question_ids: List[str]):
    #     url = f"{self.base_url}survey-definitions/{survey_id}/blocks"
    #     block_payload = {
    #         "BlockID":       block_id,
    #         "Description":   "All Questions",
    #         "Type":          "Standard",
    #         "DataExportTag": block_id,
    #         "BlockElements": [
    #             {"Type": "Question", "QuestionID": qid}
    #             for qid in question_ids
    #         ]
    #     }
    #     resp = requests.post(url, headers=self.headers, json=block_payload)
    #     print(f"POST blocks → {resp.status_code}", resp.json())


    def add_block(self, survey_id: str, block_payload: dict):
        url = f"{self.base_url}survey-definitions/{survey_id}/blocks"
        resp = requests.post(url, headers=self.headers, json=block_payload)
        print(f"POST blocks → {resp.status_code}", resp.json())



    def update_flow(self, survey_id: str, flow_payload: dict):
        url = f"{self.base_url}survey-definitions/{survey_id}/flow"
        resp = requests.put(url, headers=self.headers, json=flow_payload)
        print("PUT flow →", resp.status_code, resp.json())
    
    def activate_survey(self, survey_id):
        """
        Activate a survey to make it available for distribution
        
        Args:
            survey_id (str): ID of the survey to activate
            
        Returns:
            bool: True if successful
        """
        print(f"Activating survey: {survey_id}")
        
        url = f"{self.base_url}surveys/{survey_id}"
        payload = json.dumps({"isActive": True})
        
        response = requests.put(url, headers=self.headers, data=payload)
        
        if response.status_code != 200:
            raise Exception(f"Failed to activate survey: {response.text}")
        
        print(f"Survey activated successfully")
        return True
    
    def create_distribution_link(self, survey_id, link_type="Anonymous"):
        """
        Create a distribution link for a survey
        
        Args:
            survey_id (str): ID of the survey to distribute
            link_type (str): Type of link (Anonymous or Individual)
            
        Returns:
            str: Distribution link URL
        """
        print(f"Creating distribution link for survey: {survey_id}")
        
        # For anonymous links, we can construct the URL directly based on the standard pattern
        # https://DATACENTERID.qualtrics.com/jfe/form/SURVEYID
        if link_type == "Anonymous":
            survey_link = f"https://{self.data_center}.qualtrics.com/jfe/form/{survey_id}"
            print(f"Anonymous survey link created: {survey_link}")
            return survey_link
        
        # For other distribution types, we would use the API, but that's not implemented yet
        else:
            raise NotImplementedError(f"Distribution type '{link_type}' is not yet supported")
    
    def get_survey_responses(self, survey_id, file_format="csv"):
        """
        Download survey responses
        
        Args:
            survey_id (str): ID of the survey
            file_format (str): Format of the response file (csv, json, spss, etc.)
            
        Returns:
            pandas.DataFrame: Survey responses as a DataFrame
        """
        print(f"Downloading responses for survey: {survey_id}")
        
        # Step 1: Create the export
        export_url = f"{self.base_url}surveys/{survey_id}/export-responses"
        export_payload = json.dumps({
            "format": file_format,
            "useLabels": True
        })
        
        export_response = requests.post(export_url, headers=self.headers, data=export_payload)
        
        if export_response.status_code != 200:
            raise Exception(f"Failed to initiate export: {export_response.text}")
        
        progress_id = export_response.json()["result"]["progressId"]
        
        # Step 2: Check export progress
        progress_status = "inProgress"
        progress = 0
        
        while progress_status != "complete" and progress < 100:
            progress_url = f"{self.base_url}surveys/{survey_id}/export-responses/{progress_id}"
            progress_response = requests.get(progress_url, headers=self.headers)
            
            if progress_response.status_code != 200:
                raise Exception(f"Failed to check export progress: {progress_response.text}")
            
            progress_result = progress_response.json()["result"]
            progress_status = progress_result["status"]
            progress = progress_result.get("percentComplete", 0)
            
            print(f"Export progress: {progress}%")
            
            if progress_status != "complete" and progress < 100:
                time.sleep(2)
        
        # Step 3: Download the file
        file_id = progress_result["fileId"]
        download_url = f"{self.base_url}surveys/{survey_id}/export-responses/{file_id}/file"
        download_response = requests.get(download_url, headers=self.headers)
        
        if download_response.status_code != 200:
            raise Exception(f"Failed to download responses: {download_response.text}")
        
        # Step 4: Extract and parse the zip file
        with zipfile.ZipFile(io.BytesIO(download_response.content)) as zip_file:
            data_file = [f for f in zip_file.namelist() if f.endswith(f".{file_format}")][0]
            with zip_file.open(data_file) as file:
                if file_format == "csv":
                    df = pd.read_csv(file)
                elif file_format == "json":
                    df = pd.read_json(file)
                else:
                    raise ValueError(f"Unsupported file format: {file_format}")
        
        print(f"Successfully downloaded {len(df)} responses")
        return df
    ...

class MTurkClient:
    """Handles all MTurk API interactions"""

    def __init__(
        self,
        aws_access_key_id: str = None,
        aws_secret_access_key: str = None,
        use_sandbox: bool = True
        ):

        if aws_access_key_id and aws_secret_access_key:
            self.aws_access_key_id     = aws_access_key_id
            self.aws_secret_access_key = aws_secret_access_key
        else:
            self.aws_access_key_id     = os.getenv('AWS_ACCESS_KEY_ID')
            self.aws_secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY')
        self.use_sandbox = use_sandbox

        if not self.aws_access_key_id or not self.aws_secret_access_key:
            raise ValueError("Missing AWS credentials")

        region = os.getenv('AWS_REGION', 'us-east-1')
        endpoint = (
            'https://mturk-requester-sandbox.us-east-1.amazonaws.com'
            if self.use_sandbox else
            'https://mturk-requester.us-east-1.amazonaws.com'
        )

        self.client = boto3.client(
            'mturk',
            aws_access_key_id=self.aws_access_key_id,
            aws_secret_access_key=self.aws_secret_access_key,
            region_name=region,
            endpoint_url=endpoint
        )
        print(f"MTurk client initialized in {'Sandbox' if self.use_sandbox else 'Production'} mode")
        
    def get_account_balance(self):
        """Get the available MTurk account balance"""
        response = self.client.get_account_balance()
        balance = response['AvailableBalance']
        print(f"MTurk account balance: ${balance}")
        return float(balance)
    
    def create_hit_with_survey_link(self, survey_link, hit_config=None):
        """
        Create an MTurk HIT with a link to a Qualtrics survey
        
        Args:
            survey_link (str): URL to the Qualtrics survey
            hit_config (dict, optional): Custom configuration for the HIT
            
        Returns:
            str: HIT ID
        """
        print("Creating MTurk HIT with survey link")
        
        # Default HIT configuration
        if not hit_config:
            hit_config = {
                'Title': 'Complete a short survey',
                'Description': 'We need your input for a quick survey that should take less than 10 minutes',
                'Keywords': 'survey, research, opinion, feedback',
                'Reward': '0.50',
                'MaxAssignments': 100,
                'LifetimeInSeconds': 86400,  # 1 day
                'AssignmentDurationInSeconds': 1800,  # 30 minutes
                'AutoApprovalDelayInSeconds': 86400,  # 1 day
                'QualificationRequirements': []
            }
        
        # Create the HTML question with the survey link
        question_html = f"""
        <HTMLQuestion xmlns="http://mechanicalturk.amazonaws.com/AWSMechanicalTurkDataSchemas/2011-11-11/HTMLQuestion.xsd">
            <HTMLContent><![CDATA[
                <!DOCTYPE html>
                <html>
                <head>
                    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
                    <script type='text/javascript' src='https://s3.amazonaws.com/mturk-public/externalHIT_v1.js'></script>
                </head>
                <body>
                    <form name='mturk_form' method='post' id='mturk_form' action='https://www.mturk.com/mturk/externalSubmit'>
                        <input type='hidden' value='' name='assignmentId' id='assignmentId'/>
                        <h1>Survey Task</h1>
                        <p>Please complete the survey at the following link:</p>
                        <p><a href='{survey_link}' target='_blank'>{survey_link}</a></p>
                        <p>After completing the survey, you will receive a completion code. Enter the code below:</p>
                        <p><input type='text' name='completion_code' id='completion_code' size='40'/></p>
                        <p><input type='submit' id='submitButton' value='Submit' /></p>
                    </form>
                    <script language='Javascript'>
                        turkSetAssignmentID();
                    </script>
                </body>
                </html>
            ]]></HTMLContent>
            <FrameHeight>600</FrameHeight>
        </HTMLQuestion>
        """
        
        # Create the HIT
        response = self.client.create_hit(
            Title=hit_config['Title'],
            Description=hit_config['Description'],
            Keywords=hit_config['Keywords'],
            Reward=hit_config['Reward'],
            MaxAssignments=hit_config['MaxAssignments'],
            LifetimeInSeconds=hit_config['LifetimeInSeconds'],
            AssignmentDurationInSeconds=hit_config['AssignmentDurationInSeconds'],
            AutoApprovalDelayInSeconds=hit_config['AutoApprovalDelayInSeconds'],
            Question=question_html,
            QualificationRequirements=hit_config['QualificationRequirements']
        )
        
        hit_id = response['HIT']['HITId']
        hit_type_id = response['HIT']['HITTypeId']
        
        print(f"HIT created successfully with ID: {hit_id}")
        
        # Print the HIT URL
        if self.use_sandbox:
            worker_url = f"https://workersandbox.mturk.com/mturk/preview?groupId={hit_type_id}"
        else:
            worker_url = f"https://worker.mturk.com/mturk/preview?groupId={hit_type_id}"
            
        print(f"Workers can access the HIT at: {worker_url}")
        
        return hit_id
    
    def get_hit_assignments(self, hit_id):
        """
        Get all assignments for a HIT
        
        Args:
            hit_id (str): ID of the HIT
            
        Returns:
            list: List of assignment dictionaries
        """
        print(f"Getting assignments for HIT: {hit_id}")
        
        # List to store all assignments
        all_assignments = []
        
        # Get assignments with pagination
        next_token = None
        
        while True:
            if next_token:
                response = self.client.list_assignments_for_hit(
                    HITId=hit_id,
                    NextToken=next_token,
                    MaxResults=100
                )
            else:
                response = self.client.list_assignments_for_hit(
                    HITId=hit_id,
                    MaxResults=100
                )
            
            all_assignments.extend(response['Assignments'])
            
            if 'NextToken' in response:
                next_token = response['NextToken']
            else:
                break
        
        print(f"Found {len(all_assignments)} assignments")
        return all_assignments
    
    def approve_assignments(self, assignments, feedback=None):
        """
        Approve multiple assignments
        
        Args:
            assignments (list): List of assignment dictionaries or IDs
            feedback (str, optional): Feedback to workers
            
        Returns:
            int: Number of successfully approved assignments
        """
        approved_count = 0
        
        for assignment in assignments:
            # Extract assignment ID if a dictionary was provided
            assignment_id = assignment['AssignmentId'] if isinstance(assignment, dict) else assignment
            
            try:
                self.client.approve_assignment(
                    AssignmentId=assignment_id,
                    RequesterFeedback=feedback if feedback else "Thank you for your participation!"
                )
                approved_count += 1
            except Exception as e:
                print(f"Error approving assignment {assignment_id}: {str(e)}")
        
        print(f"Successfully approved {approved_count} assignments")
        return approved_count
    
    def delete_hit(self, hit_id):
        """
        Delete a HIT
        
        Args:
            hit_id (str): ID of the HIT to delete
            
        Returns:
            bool: True if successful
        """
        try:
            # Get the HIT status
            hit = self.client.get_hit(HITId=hit_id)
            status = hit['HIT']['HITStatus']
            
            # If the HIT is reviewable, dispose of it
            if status == 'Reviewable':
                self.client.delete_hit(HITId=hit_id)
                print(f"HIT {hit_id} deleted successfully")
                return True
            
            # If the HIT is assignable, expire it first then delete it
            elif status == 'Assignable':
                self.client.update_expiration_for_hit(
                    HITId=hit_id,
                    ExpireAt=datetime(2015, 1, 1)  # Set to a past date to expire immediately
                )
                time.sleep(1)  # Give time for the HIT to update
                self.client.delete_hit(HITId=hit_id)
                print(f"HIT {hit_id} expired and deleted successfully")
                return True
                
            else:
                print(f"Cannot delete HIT {hit_id}, status is {status}")
                return False
                
        except Exception as e:
            print(f"Error deleting HIT {hit_id}: {str(e)}")
            return False


from dotenv import load_dotenv
from typing import Optional

class QualtricsAndMTurkAutomation:
    def __init__(self, mturk_client: Optional[MTurkClient] = None):
        load_dotenv()
        self.qualtrics = QualtricsClient()
        self.mturk     = mturk_client or MTurkClient()

    def run(self, survey_payload: dict, hit_config: dict) -> dict:
        survey_id = self.qualtrics.create_survey(
            survey_name=survey_payload["SurveyName"],
            survey_template=survey_payload
        )

        questions = []
        for qid, qobj in survey_payload["Questions"].items():
            num = qid.lstrip("Q")
            real_qid = f"QID{num}"
            
            q_data = {
                "question_id":   real_qid,
                "question_text": qobj["QuestionText"],
                "QuestionID":    real_qid,
                "QuestionText":  qobj["QuestionText"],
                "QuestionType":  qobj["QuestionType"],
                "Selector":      qobj["Selector"]
            }
            
            if "SubSelector" in qobj:
                q_data["SubSelector"] = qobj["SubSelector"]
            
            if "Choices" in qobj:
                q_data["Choices"] = qobj["Choices"]
            
            questions.append(q_data)


        # questions = []
        # for qid, qobj in survey_payload["Questions"].items():
        #     num = qid.lstrip("Q")
        #     real_qid = f"QID{num}"
        #     questions.append({
        #         "question_id":   real_qid,           
        #         "question_text": qobj["QuestionText"], 
        #         "QuestionID":    real_qid,          
        #         "QuestionText":  qobj["QuestionText"],
        #         "QuestionType":  qobj["QuestionType"],
        #         "Selector":      qobj["Selector"],
        #         "SubSelector":   qobj["SubSelector"],
        #         "Choices":       qobj["Choices"]
        #     })
        self.qualtrics.add_questions(survey_id, questions)


        comp_qid = f"QID{len(questions)+1}"
        self.qualtrics.add_questions(survey_id, [{
            "question_id":   comp_qid,
            "question_text": (
                "Thank you for completing the survey!\n"
                "Your completion code is: ${e://Field/ResponseID}"
            ),
            "QuestionID":    comp_qid,
            "QuestionText":  (
                "Thank you for completing the survey!\n"
                "Your completion code is: ${e://Field/ResponseID}"
            ),
            "QuestionType":  "DB",   # Descriptive Text
            "Selector":      "TB"    # Text/Graphic Block
        }])

        self.qualtrics.activate_survey(survey_id)

        survey_link = self.qualtrics.create_distribution_link(survey_id)

        hit_id = self.mturk.create_hit_with_survey_link(survey_link, hit_config)

        return {
            "survey_id":   survey_id,
            "survey_link": survey_link,
            "hit_id":      hit_id
        }
    def collect_and_process_results(self, survey_id, hit_id, auto_approve=True):
        results = {}
        
        try:
         
            responses_df = self.qualtrics.get_survey_responses(survey_id)
            results['responses'] = responses_df
            

            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            csv_filename = f"survey_responses_{timestamp}.csv"
            responses_df.to_csv(csv_filename, index=False)
            results['csv_filename'] = csv_filename
            
            print(f"Saved {len(responses_df)} responses to {csv_filename}")
            
            assignments = self.mturk.get_hit_assignments(hit_id)
            results['assignments'] = assignments
            
            if auto_approve and assignments:
                approved_count = self.mturk.approve_assignments(assignments)
                results['approved_count'] = approved_count
            
            return results
            
        except Exception as e:
            print(f"Error collecting results: {str(e)}")
            return results



import asyncio

if __name__ == "__main__":
    # survey_to_process = """
    # Topic: The Theory of Planned Behavior Survey
    # Questions:
    # 1. I intend to purchase organic food in the next month. (1=Strongly disagree; 7=Strongly agree)
    # 2. Buying organic food is beneficial to my health. (1=Strongly disagree; 7=Strongly agree)
    # 3. I feel confident in my ability to purchase organic food if I want to. (1=Strongly disagree; 7=Strongly agree)
    # 4. How do I think of the organic food (text question)
    # 5. The decision to buy organic food is entirely up to me. (1=Strongly disagree; 7=Strongly agree)
    # 6. What barriers, if any, prevent you from buying organic food? (text question)
    # 7. How do elders think of organic food? (text question)
    # """
    print("========================================")
    print("⚠️  INPUT REQUIREMENTS:")
    print("- You must include a line starting with 'Topic:'")
    print("- You must include at least one line starting with 'Questions:'")
    print("Otherwise, the survey cannot be processed.")
    print("========================================")
    survey_to_process = input("Please enter the Survey content: ")

    survey_dict = await flow.kickoff_async(inputs={
        'survey_text': survey_to_process
    })

    annotated = survey_dict['original_with_comments']
    revised   = survey_dict['revised_survey'] 

    survey_dict = await flow.kickoff_async(inputs={
        'survey_text': survey_to_process
    })

    
    qualtrics_payload = survey_dict_to_qualtrics_payload(survey_dict)

    aws_key    = 
    aws_secret = 
    mturk = MTurkClient(aws_key, aws_secret, use_sandbox=True)

    hit_config = {
        'Title': 'Complete a short survey on organic food',
        'Description': survey_dict["revised_survey"]["purpose"],
        'Keywords': 'survey, research, feedback',
        'Reward': '0.75',
        'MaxAssignments': 100,
        'LifetimeInSeconds': 86400,
        'AssignmentDurationInSeconds': 1800,
        'AutoApprovalDelayInSeconds': 86400,
        'QualificationRequirements': []
    }
    
    automation = QualtricsAndMTurkAutomation()
    results = automation.run(qualtrics_payload, hit_config)

/Users/princess/.local/lib/python3.12/site-packages/pydantic/_internal/_config.py:295: PydanticDeprecatedSince20: Support for class-based `config` is deprecated, use ConfigDict instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/


TypeError: Flow.__init__() got an unexpected keyword argument 'agents'

In [None]:
print("\n=== Original Survey (with comments) ===")
print(f"Theme:   {annotated['survey']['theme']}")
print(f"Purpose: {annotated['survey']['purpose']}\n")
for q in annotated['survey']['questions']:
    print(f"Q{q['question_id']}: {q['question_text']}")
    comment = next((c['comment'] 
                    for c in annotated['question_comments'] 
                    if c['question_id']==q['question_id']), None)
    if comment:
        print(f"  → Comment: {comment}")
    print()
if annotated.get('overall_comment'):
    print(f"Overall comment: {annotated['overall_comment']}\n")

print("=== Revised Survey ===")
print(f"Theme:   {revised['theme']}")
print(f"Purpose: {revised['purpose']}\n")
for q in revised['questions']:
    print(f"Q{q['question_id']}: {q['question_text']}")
    opts = q['input_config'].get('options')
    if opts:
        print("  Options:")
        for o in opts:
            print(f"    - {o}")
    print()


=== Original Survey (with comments) ===
Theme:   The Theory of Planned Behavior Survey
Purpose: To assess consumer intentions and perceptions regarding organic food purchasing.

Qq1: I intend to purchase organic food in the next month.
  → Comment: Measures behavioral intentions effectively.

Qq2: Buying organic food is beneficial to my health.
  → Comment: Assesses important health beliefs regarding organic food.

Qq3: I feel confident in my ability to purchase organic food if I want to.
  → Comment: Focuses on self-efficacy, aligning with TPB.

Qq4: How do I think of the organic food?
  → Comment: Allows for qualitative insights into personal beliefs.

Qq5: The decision to buy organic food is entirely up to me.
  → Comment: Reiterates consumer autonomy in purchasing decisions.

Qq6: What barriers, if any, prevent you from buying organic food?
  → Comment: Identifies barriers that may inhibit organic food purchases.

Qq7: How do elders think of organic food?
  → Comment: Explores gen

In [None]:
survey_id = results['survey_id']
hit_id = results['hit_id']

print(f"Ready to collect data for Survey ID: {survey_id} and HIT ID: {hit_id}")

collected_data = automation.collect_and_process_results(
    survey_id=survey_id,
    hit_id=hit_id,
    auto_approve=True 
)

print("Data collection completed. Summary:")
print(collected_data)

if 'responses' in collected_data:
    display(collected_data['responses'])
else:
    print("No responses collected.")

Ready to collect data for Survey ID: SV_5hXqPl4yhqXzBhY and HIT ID: 3LAZVA75O8O8VDMMCS3XOZDHD98O24
Downloading responses for survey: SV_5hXqPl4yhqXzBhY
Export progress: 0.0%
Export progress: 0.0%
Export progress: 100.0%
Successfully downloaded 3 responses
Saved 3 responses to survey_responses_20250507_104325.csv
Getting assignments for HIT: 3LAZVA75O8O8VDMMCS3XOZDHD98O24
Found 1 assignments
Successfully approved 1 assignments
Data collection completed. Summary:
{'responses':                                  StartDate  \
0                               Start Date   
1  {"ImportId":"startDate","timeZone":"Z"}   
2                      2025-05-07 14:42:05   

                                 EndDate                 Status  \
0                               End Date          Response Type   
1  {"ImportId":"endDate","timeZone":"Z"}  {"ImportId":"status"}   
2                    2025-05-07 14:42:18             IP Address   

                  IPAddress                 Progress    Duration (

Unnamed: 0,StartDate,EndDate,Status,IPAddress,Progress,Duration (in seconds),Finished,RecordedDate,ResponseId,RecipientLastName,...,LocationLongitude,DistributionChannel,UserLanguage,QIDID1,QIDID2,QIDID3,QIDID4,QIDID5,QIDID6,QIDID7
0,Start Date,End Date,Response Type,IP Address,Progress,Duration (in seconds),Finished,Recorded Date,Response ID,Recipient Last Name,...,Location Longitude,Distribution Channel,User Language,To what extent do you plan to purchase organic...,"In your opinion, how beneficial is buying orga...",How confident are you in your ability to purch...,Please share your thoughts on organic food.,Do you feel that the decision to purchase orga...,"What obstacles, if any, prevent you from buyin...",What are the common perceptions among elders r...
1,"{""ImportId"":""startDate"",""timeZone"":""Z""}","{""ImportId"":""endDate"",""timeZone"":""Z""}","{""ImportId"":""status""}","{""ImportId"":""ipAddress""}","{""ImportId"":""progress""}","{""ImportId"":""duration""}","{""ImportId"":""finished""}","{""ImportId"":""recordedDate"",""timeZone"":""Z""}","{""ImportId"":""_recordId""}","{""ImportId"":""recipientLastName""}",...,"{""ImportId"":""locationLongitude""}","{""ImportId"":""distributionChannel""}","{""ImportId"":""userLanguage""}","{""ImportId"":""QID1""}","{""ImportId"":""QID2""}","{""ImportId"":""QID3""}","{""ImportId"":""QID4_TEXT""}","{""ImportId"":""QID5""}","{""ImportId"":""QID6_TEXT""}","{""ImportId"":""QID7_TEXT""}"
2,2025-05-07 14:42:05,2025-05-07 14:42:18,IP Address,216.165.95.164,100,12,True,2025-05-07 14:42:19,R_1NRsFphj2wLOdkq,,...,-73.9904,anonymous,EN,3,3,6,,,,
