# Survey Distribution & Collection Tool

In [18]:
import os
import boto3
import xml.etree.ElementTree as ET
from botocore.config import Config

os.environ['AWS_ACCESS_KEY_ID']     = 
os.environ['AWS_SECRET_ACCESS_KEY'] = 
os.environ['AWS_REGION']            = 

config = Config(region_name='us-east-1', signature_version='v4')
mturk = boto3.client(
    'mturk',
    config=config,
    aws_access_key_id=os.environ['AWS_ACCESS_KEY_ID'],
    aws_secret_access_key=os.environ['AWS_SECRET_ACCESS_KEY'],
    endpoint_url='https://mturk-requester-sandbox.us-east-1.amazonaws.com'
)

In [20]:
def create_survey_hit(survey_link: str,
                      reward: str = "0.01",
                      max_assignments: int = 1) -> str:
    xml_question = f"""<?xml version="1.0" encoding="UTF-8"?>
<ExternalQuestion xmlns="http://mechanicalturk.amazonaws.com/AWSMechanicalTurkDataSchemas/2006-07-14/ExternalQuestion.xsd">
  <ExternalURL>{survey_link}</ExternalURL>
  <FrameHeight>600</FrameHeight>
</ExternalQuestion>"""
    resp = mturk.create_hit(
        Title="Sandbox Survey Test",
        Description="请在此 HIT 中打开问卷并填写",
        Reward=reward,
        MaxAssignments=max_assignments,
        LifetimeInSeconds=3600,
        AssignmentDurationInSeconds=1800,
        Question=xml_question
    )
    hit_id = resp['HIT']['HITId']
    print(f"Created HIT: {hit_id}")
    return hit_id

survey_link = "https://qualtricsxmnznfsjncm.qualtrics.com/jfe/form/SV_00tpW4sa8vcRYma"
hit_id = create_survey_hit(survey_link)

Created HIT: 34XASH8KMGJGNH2G1AZWS8W5TGIMP1


In [22]:
print("\n👉 请在 https://workersandbox.mturk.com/ 用 Sandbox Worker 账号领取并提交该 HIT 后，按回车继续...")
input()


👉 请在 https://workersandbox.mturk.com/ 用 Sandbox Worker 账号领取并提交该 HIT 后，按回车继续...


 


''

In [5]:
def list_assignments(hit_id: str):
    resp = mturk.list_assignments_for_hit(
        HITId=hit_id,
        AssignmentStatuses=['Submitted', 'Approved', 'Rejected'],
        MaxResults=100
    )
    return resp['Assignments']

def parse_external_answer(answer_xml: str) -> dict:
    root = ET.fromstring(answer_xml)
    result = {}
    for ans in root.findall('.//AnswerField'):
        qid = ans.findtext('QuestionIdentifier')
        text = ans.findtext('FreeText')
        result[qid] = text
    return result

assignments = list_assignments(hit_id)
if not assignments:
    print("⚠️ 尚未收到任何提交。")
else:
    for a in assignments:
        print("――― Assignment ID:", a['AssignmentId'])
        print("    Worker ID:    ", a['WorkerId'])
        print("    Answer：", parse_external_answer(a['Answer']))

⚠️ 尚未收到任何提交。


In [7]:
# attmept achiving this with agents

# publish_tool = PublishSurveyTool()
# fetch_tool   = FetchSurveyTool()

# deploy_agent  = Agent(config=deploy_cfg,  tools=[publish_tool])
# collect_agent = Agent(config=collect_cfg, tools=[fetch_tool])

# post_task = Task(
#     config=post_cfg,
#     agent=deploy_agent,
#     run=publish_tool.run
# )
# get_task = Task(
#     config=get_cfg,
#     agent=collect_agent,
#     run=fetch_tool.run
# )

# if __name__ == "__main__":
#     crew = Crew(
#         agents=[deploy_agent, collect_agent],
#         tasks =[post_task, get_task],
#         verbose=True
#     )

#     survey_definition = {
#         "title": "Customer Satisfaction Survey",
#         "fields": [
#             {
#                 "title": "How satisfied are you with our service?",
#                 "type": "multiple_choice",
#                 "properties": {
#                     "choices": [
#                         {"label": "Very Satisfied"},
#                         {"label": "Satisfied"},
#                         {"label": "Neutral"},
#                         {"label": "Dissatisfied"}
#                     ]
#                 }
#             }
#         ]
#     }

#     direct_pub = publish_tool.run(survey_definition)
#     print("Direct publish output:", direct_pub)
#     fid = direct_pub["id"]
#     direct_fetch = fetch_tool.run(fid)
#     print("Direct fetch output:", direct_fetch)

#     # print("\n=== Using Crew.kickoff ===")
#     # deploy_out  = crew.kickoff(inputs={"survey_definition": survey_definition})
#     # print("Crew deployed:", deploy_out)
#     # fid = deploy_out["id"]
#     # collect_out = crew.kickoff(inputs={"survey_id": fid})
#     # print("Crew collected:", collect_out)


In [5]:
import os
import yaml
import json
import warnings
from datetime import datetime
from typing import List, Union, Literal, Optional, Dict, Any

from pydantic import BaseModel
from crewai import Agent, Task, Crew, Flow, Process
from crewai.flow.flow import start
from crewai.tasks.task_output import OutputFormat

import requests
import zipfile
import io
import time
import pandas as pd
import boto3
from dotenv import load_dotenv
import logging
logging.getLogger("opentelemetry").setLevel(logging.ERROR)
logging.getLogger("urllib3").setLevel(logging.ERROR)
os.environ["OTEL_TRACES_EXPORTER"] = "none"

# ========== Pydantic Models ==========
class ChoiceOption(BaseModel):
    text: str
    value: str

class ChoiceConfig(BaseModel):
    options: List[ChoiceOption]

class SliderConfig(BaseModel):
    min: float
    max: float
    step: float
    
class MatrixRowOption(BaseModel):
    text: str
    value: str

class MatrixColumnOption(BaseModel):
    text: str
    value: str

class MatrixConfig(BaseModel):
    rows: List[MatrixRowOption]
    columns: List[MatrixColumnOption]

class TextInputConfig(BaseModel):
    placeholder: Optional[str] = None
    multiline: bool = False

class Question(BaseModel):
    question_id: str
    question_text: str
    input_type: Literal["multiple_choice", "single_choice", "slider", "text_input", "matrix", "text_entry", "descriptive_text", "rank_order", "side_by_side", "drill_down", "meta_info", "constant_sum", "timing", "heat_map", "net_promoter", "graphic_slider"]
    input_config: Union[ChoiceConfig, SliderConfig, TextInputConfig, MatrixConfig]

class Survey(BaseModel):
    theme: str
    purpose: str
    questions: List[Question]

class QuestionComment(BaseModel):
    question_id: str
    comment: str

class AnnotatedSurvey(BaseModel):
    survey: Survey
    question_comments: List[QuestionComment]
    overall_comment: Optional[str]

class SurveyImprovementResult(BaseModel):
    original_with_comments: AnnotatedSurvey
    revised_survey: Survey

# ========== Utility to load YAML ==========
def load_yaml(path: str) -> dict:
    with open(path, 'r', encoding='utf-8') as f:
        return yaml.safe_load(f)

# ========== Agent Definitions ==========
def load_agent_config(yaml_path, agent_key, default_config=None):
    """Safely load agent configuration with fallback to default"""
    try:
        config = load_yaml(yaml_path)
        if agent_key in config:
            return config[agent_key]
        else:
            print(f"Warning: {agent_key} not found in {yaml_path}. Using default configuration.")
            return default_config or {}
    except FileNotFoundError:
        print(f"Warning: {yaml_path} not found. Using default configuration.")
        return default_config or {}

# Default agent configurations
default_convert_agent_config = {
    "role": "Survey Conversion Assistant",
    "goal": "Convert survey text to structured JSON format",
    "backstory": "Expert at parsing survey data",
    "verbose": True,
    "allow_delegation": False
}

default_editor_agent_config = {
    "role": "Survey Editor",
    "goal": "Improve survey quality and structure",
    "backstory": "Expert at survey design and improvement",
    "verbose": True,
    "allow_delegation": False
}

# Load agent configurations with fallbacks
conv_cfg = load_agent_config("config/agents/survey_convert_agent.yaml", "survey_convert_agent", default_convert_agent_config)
convert_agent = Agent(
    name="survey_convert_agent",
    role=conv_cfg["role"],
    goal=conv_cfg["goal"],
    backstory=conv_cfg["backstory"],
    verbose=conv_cfg["verbose"],
    allow_delegation=conv_cfg["allow_delegation"]
)

edit_cfg = load_agent_config("config/agents/survey_editor.yaml", "survey_editor", default_editor_agent_config)
editor_agent = Agent(
    name="survey_editor_agent",
    role=edit_cfg["role"],
    goal=edit_cfg["goal"],
    backstory=edit_cfg["backstory"],
    verbose=edit_cfg["verbose"],
    allow_delegation=edit_cfg["allow_delegation"]
)

# ========== Task Definitions ==========
def load_task_config(yaml_path, task_key, default_config=None):
    """Safely load task configuration with fallback to default"""
    try:
        config = load_yaml(yaml_path)
        if task_key in config:
            return config[task_key]
        else:
            print(f"Warning: {task_key} not found in {yaml_path}. Using default configuration.")
            return default_config or {}
    except FileNotFoundError:
        print(f"Warning: {yaml_path} not found. Using default configuration.")
        return default_config or {}

# Default task configurations
default_convert_config = {
    "description": "Convert survey text to JSON format",
    "expected_output": "JSON-formatted survey data",
    "inputs": {},
    "outputs": {}
}

default_research_config = {
    "description": "Research survey best practices and make recommendations",
    "expected_output": "Survey enhancement recommendations",
    "inputs": {},
    "outputs": {}
}

default_improve_config = {
    "description": "Apply survey improvements based on recommendations",
    "expected_output": "Improved survey JSON",
    "inputs": {},
    "outputs": {}
}

# Load task configurations with fallbacks
conv_t = load_task_config("config/tasks/convert_survey_to_json.yaml", "convert_survey_to_json", default_convert_config)
convert_task = Task(
    name="convert_survey_to_json",
    description=conv_t["description"],
    agent=convert_agent,
    tool=conv_t.get("tool"),
    inputs=list(conv_t.get("inputs", {}).keys()),
    outputs=list(conv_t.get("outputs", {}).keys()),
    expected_output=conv_t["expected_output"],
    output_format=OutputFormat.JSON
)

res_t = load_task_config("config/tasks/apply_survey_enhancements.yaml", "research_task", default_research_config)
research_task = Task(
    name="research_task",
    description=res_t["description"],
    agent=convert_agent,
    inputs=list(res_t.get("inputs", {}).keys()),
    outputs=list(res_t.get("outputs", {}).keys()),
    expected_output=res_t["expected_output"],
    output_format=OutputFormat.JSON
)

imp_t = load_task_config("config/tasks/apply_survey_enhancements.yaml", "improve_survey", default_improve_config)
improve_task = Task(
    name="improve_survey",
    description=imp_t["description"],
    agent=editor_agent,
    inputs=list(imp_t.get("inputs", {}).keys()),
    outputs=list(imp_t.get("outputs", {}).keys()),
    expected_output=imp_t["expected_output"],
    output_format=OutputFormat.JSON
)

# ========== Crew Definition ==========
survey_crew = Crew(
    agents=[convert_agent, editor_agent],
    tasks=[convert_task, research_task, improve_task],
    process=Process.sequential,
    verbose=True
)

# ========== Flow Definition ==========
class SurveyFlow(Flow):
    @start()
    def initial_run(self):
        survey_text = self.state['survey_text'].strip()
        first_line = survey_text.splitlines()[0]
        topic = first_line.replace('Topic:', '').strip()
        current_year = datetime.now().year

        crew_result = survey_crew.kickoff(
            inputs={
                'survey_text': survey_text,
                'topic': topic,
                'current_year': current_year
            }
        )

        raw = crew_result.raw.strip()
        if raw.startswith('```') and raw.endswith('```'):
            raw = raw.split('\n', 1)[1].rsplit('```', 1)[0]
        try:
            survey_dict = json.loads(raw)
        except json.JSONDecodeError as e:
            raise ValueError(f"解析 JSON 失败: {e}\nRaw output:\n{raw}")

        # Pretty-print the original survey with comments
        annotated = survey_dict.get('original_with_comments', {})
        survey = annotated.get('survey', {})
        comments = annotated.get('question_comments', [])
        print("\n=== Original Survey (with comments) ===")
        print(f"Theme: {survey.get('theme', '')}")
        print(f"Purpose: {survey.get('purpose', '')}\n")
        for q in survey.get('questions', []):
            qid = q.get('question_id')
            print(f"Question {qid}: {q.get('question_text')}")
            comment = next((c['comment'] for c in comments if c['question_id'] == qid), None)
            if comment:
                print(f"  Comment: {comment}")
            print()
        overall = annotated.get('overall_comment')
        if overall:
            print(f"Overall comment: {overall}\n")
            
        revised = survey_dict.get('revised_survey', {})
        print("=== Revised Survey ===")
        print(f"Theme:   {revised['theme']}")
        print(f"Purpose: {revised['purpose']}\n")
        for q in revised['questions']:
            print(f"Q{q['question_id']}: {q['question_text']}")
            opts = q['input_config'].get('options')
            if opts:
                print("  Options:")
                for o in opts:
                    print(f"    - {o}")
            print()
        self.state['survey_dict'] = survey_dict
        return survey_dict

        
flow = SurveyFlow(
    agents=[convert_agent, editor_agent],
    tasks=[convert_task, research_task, improve_task],
    process=Process.sequential,
    verbose=True
)


def survey_dict_to_qualtrics_payload(survey_dict: dict) -> dict:
    """
    input survey_dict
      - theme(for SurveyName)
      - purpose (Description)
      - questions: List of { question_id, question_text, input_type, input_config }
    Qualtrics v3 API 的 survey-definitions JSON
    """

    survey_meta = survey_dict["revised_survey"] 
    payload = {
        "SurveyName": survey_meta.get("theme", "New Survey"),
        "Language": "EN",
        "ProjectCategory": "CORE",
        "Questions": {}
    }
    questions = survey_meta["questions"]
    for q in questions:
        qid = q["question_id"]
        qt = q["question_text"]
        it = q["input_type"]
        cfg = q["input_config"]

        qobj = {
            "QuestionText": qt,
            "Configuration": {"QuestionDescriptionOption": "UseText"},
            "Validation": {"Settings": {"ForceResponse": "OFF", "Type": "None"}}
        }

        if it in ("multiple_choice", "single_choice"):
            choices_dict = {}
            for opt in cfg["options"]:
                if "=" in opt:
                    index, text = opt.split("=", 1)
                    choices_dict[index.strip()] = {"Display": text.strip()}
                else:
                    idx = str(len(choices_dict)+1)
                    choices_dict[idx] = {"Display": opt.strip()}
            
            qobj.update({
                "QuestionType": "MC",
                "Selector": "SAVR" if it == "multiple_choice" else "SINGLE",
                "SubSelector": "TX",
                "Choices": choices_dict
            })

        elif it == "slider":
            qobj.update({
                "QuestionType": "SL",
                "Selector": "Slider",
                "SubSelector": "SL"
            })
         
        elif it == "text_input":
            qobj.update({
                "QuestionType": "TE",
                "Selector": "ML" if cfg.get("multiline", False) else "TX"
            })
            
        elif it == "matrix":
            # Matrix table question (Bipolar)
            choices_dict = {}
            answs_dict = {}
            for idx, row in enumerate(cfg["rows"], 1):
                answs_dict[str(idx)] = {"Display": row["text"]}
            for idx, col in enumerate(cfg["columns"], 1):
                choices_dict[str(idx)] = {"Display": col["text"]}
            
            qobj.update({
                "QuestionType": "Matrix",
                "Selector": "Bipolar",
                "SubSelector": "SingleAnswer",
                "Choices": choices_dict,
                "Answers": answs_dict
            })
            
        elif it == "text_entry":
            qobj.update({
                "QuestionType": "TE",
                "Selector": cfg.get("selector", "TX"),
                "TextBoxFormat": cfg.get("format", None)
            })
            
        elif it == "descriptive_text":
            qobj.update({
                "QuestionType": "DB",
                "Selector": "TX"
            })
            
        elif it == "rank_order":
            choices_dict = {}
            for idx, opt in enumerate(cfg["options"], 1):
                choices_dict[str(idx)] = {"Display": opt["text"]}
            
            qobj.update({
                "QuestionType": "RO",
                "Selector": "OD",
                "Choices": choices_dict
            })
            
        elif it == "side_by_side":
            choices_dict = {}
            for idx, opt in enumerate(cfg["options"], 1):
                choices_dict[str(idx)] = {"Display": opt["text"]}
            
            qobj.update({
                "QuestionType": "SBS",
                "Selector": "MultipleTextBox",
                "Choices": choices_dict
            })
            
        elif it == "constant_sum":
            choices_dict = {}
            for idx, opt in enumerate(cfg["options"], 1):
                choices_dict[str(idx)] = {"Display": opt["text"]}
            
            qobj.update({
                "QuestionType": "CS",
                "Selector": "CSS",
                "Choices": choices_dict,
                "Configuration": {
                    "TotalSum": cfg.get("total_sum", 100)
                }
            })
            
        elif it == "heat_map":
            qobj.update({
                "QuestionType": "HM",
                "Selector": "SelectedTable",
                "HeatMapImage": cfg.get("image_url", ""),
                "HeatMapHeight": cfg.get("height", 600),
                "HeatMapWidth": cfg.get("width", 800)
            })
            
        elif it == "graphic_slider":
            qobj.update({
                "QuestionType": "GR",
                "Selector": "One",
                "SubSelector": "Slider",
                "GraphicLeft": cfg.get("left_text", "Unsatisfied"),
                "GraphicRight": cfg.get("right_text", "Satisfied"),
                "GraphicImageUp": cfg.get("image_up_url", ""),
                "GraphicImageDown": cfg.get("image_down_url", "")
            })
            
        elif it == "net_promoter":
            qobj.update({
                "QuestionType": "NPS",
                "Selector": "StandardNPS",
                "Configuration": {
                    "QuestionDescriptionOption": "UseText",
                    "Format": {
                        "TextBefore": cfg.get("text_before", "How likely are you to recommend us?"),
                        "TextAfter": cfg.get("text_after", "")
                    }
                }
            })
            
        elif it == "timing":
            qobj.update({
                "QuestionType": "Timing",
                "Selector": "PageTimer",
                "Hidden": True
            })
            
        else:
            raise ValueError(f"Unsupported input_type: {it}")
        
        payload["Questions"][qid] = qobj

    return payload

class QualtricsClient:
    """Handles all Qualtrics API interactions"""
    
    def __init__(self):
        """Initialize Qualtrics API client with credentials from .env file"""
        # Print current working directory to help debug file path issues
        print(f"Current working directory: {os.getcwd()}")
        
        # Check if .env file exists
        if os.path.exists('.env'):
            print("Found .env file in current directory")
        else:
            print("WARNING: No .env file found in current directory!")
            
        # Load environment variables
        load_dotenv(verbose=True)
        
        self.api_token = os.getenv('QUALTRICS_API_TOKEN')
        self.data_center = os.getenv('QUALTRICS_DATA_CENTER')
        self.directory_id = os.getenv('QUALTRICS_DIRECTORY_ID')
        
        # Print obfuscated token for debugging (only first/last 4 chars)
        if self.api_token:
            token_length = len(self.api_token)
            masked_token = self.api_token[:4] + '*' * (token_length - 8) + self.api_token[-4:] if token_length > 8 else "****"
            print(f"API Token loaded (masked): {masked_token}")
        else:
            print("WARNING: No API token found in environment variables!")
            
        if self.data_center:
            print(f"Data center: {self.data_center}")
        else:
            print("WARNING: No data center found in environment variables!")
        
        if not self.api_token or not self.data_center:
            raise ValueError("Missing Qualtrics API credentials in .env file")
            
        # Set up base URL for API requests
        self.base_url = f"https://{self.data_center}.qualtrics.com/API/v3/"
        self.headers = {
            "X-API-Token": self.api_token,
            "Content-Type": "application/json"
        }
        
        # Test connection
        print("Testing Qualtrics API connection...")
        try:
            test_url = f"{self.base_url}whoami"
            response = requests.get(test_url, headers=self.headers)
            if response.status_code == 200:
                user_info = response.json()["result"]
                print(f"Connection successful! Authenticated as: {user_info.get('firstName', '')} {user_info.get('lastName', '')}")
            else:
                print(f"Connection test failed with status code: {response.status_code}")
                print(f"Response: {response.text}")
        except Exception as e:
            print(f"Error testing connection: {str(e)}")
        
    def create_survey(self, survey_name, survey_template=None):
        """
        Create a new survey in Qualtrics
        
        Args:
            survey_name (str): Name of the survey
            survey_template (dict, optional): Survey template JSON
            
        Returns:
            str: Survey ID of the created survey
        """
        print(f"Creating survey: {survey_name}")
        
        # If no template is provided, use a basic template
        if not survey_template:
            # Define the survey payload with required fields including ProjectCategory
            survey_payload = {
                "SurveyName": survey_name,
                "Language": "EN",
                "ProjectCategory": "CORE", # This is the required field that was missing
                "Questions": {
                    "QID1": {
                        "QuestionText": "What is your age?",
                        "QuestionType": "MC",
                        "Selector": "SAVR", # Required selector for multiple choice questions
                        "SubSelector": "TX", # Text selector
                        "Configuration": {
                            "QuestionDescriptionOption": "UseText"
                        },
                        "Validation": {
                            "Settings": {
                                "ForceResponse": "OFF",
                                "Type": "None"
                            }
                        },
                        "Choices": {
                            "1": {"Display": "18-24"},
                            "2": {"Display": "25-34"},
                            "3": {"Display": "35-44"},
                            "4": {"Display": "45-54"},
                            "5": {"Display": "55-64"},
                            "6": {"Display": "65+"}
                        }
                    },
                    "QID2": {
                        "QuestionText": "How satisfied are you with our product?",
                        "QuestionType": "Likert",
                        "Selector": "LSL", # Likert scale
                        "SubSelector": "TX", # Text selector
                        "Configuration": {
                            "QuestionDescriptionOption": "UseText"
                        },
                        "Validation": {
                            "Settings": {
                                "ForceResponse": "OFF",
                                "Type": "None"
                            }
                        },
                        "Choices": {
                            "1": {"Display": "Very dissatisfied"},
                            "2": {"Display": "Dissatisfied"},
                            "3": {"Display": "Neutral"},
                            "4": {"Display": "Satisfied"},
                            "5": {"Display": "Very satisfied"}
                        }
                    },
                    "QID3": {
                        "QuestionText": "Any additional comments?",
                        "QuestionType": "TE", # Text entry
                        "Selector": "ML", # Multi-line
                        "Configuration": {
                            "QuestionDescriptionOption": "UseText"
                        },
                        "Validation": {
                            "Settings": {
                                "ForceResponse": "OFF",
                                "Type": "None"
                            }
                        }
                    }
                }
            }
        else:
            # If a template is provided, make sure it includes ProjectCategory
            survey_payload = survey_template
            if "ProjectCategory" not in survey_payload:
                survey_payload["ProjectCategory"] = "CORE"
        
        # Create survey
        url = f"{self.base_url}survey-definitions"
        payload = json.dumps(survey_payload)
        
        print(f"Sending payload to Qualtrics: {payload[:200]}...")
        
        response = requests.post(url, headers=self.headers, data=payload)
        
        if response.status_code != 200:
            print(f"Error response: {response.text}")
            raise Exception(f"Failed to create survey: {response.text}")
        
        result = response.json()
        survey_id = result["result"]["SurveyID"]
        print(f"Survey created successfully with ID: {survey_id}")
        
        return survey_id

    def add_questions(self, survey_id: str, questions: List[dict]):
        for q in questions:
            # start with the fields every question needs
            q_payload = {
                "QuestionID":   q["question_id"],
                "QuestionText": q["question_text"],
                "QuestionType": q["QuestionType"],
                "Configuration": {"QuestionDescriptionOption": "UseText"},
                "Validation":    {"Settings": {"ForceResponse": "OFF", "Type": "None"}},
            }
            # only add Selector/SubSelector if given
            if "Selector" in q:
                q_payload["Selector"] = q["Selector"]
            if "SubSelector" in q:
                q_payload["SubSelector"] = q["SubSelector"]
            # only add Choices if given
            if "Choices" in q:
                q_payload["Choices"] = q["Choices"]
    
            url = f"{self.base_url}survey-definitions/{survey_id}/questions"
            resp = requests.post(url, headers=self.headers, json=q_payload)
            print(f"POST questions → {resp.status_code}", resp.json())

    def add_block(self, survey_id: str, block_payload: dict):
        url = f"{self.base_url}survey-definitions/{survey_id}/blocks"
        resp = requests.post(url, headers=self.headers, json=block_payload)
        print(f"POST blocks → {resp.status_code}", resp.json())

    def update_flow(self, survey_id: str, flow_payload: dict):
        url = f"{self.base_url}survey-definitions/{survey_id}/flow"
        resp = requests.put(url, headers=self.headers, json=flow_payload)
        print("PUT flow →", resp.status_code, resp.json())
    
    def activate_survey(self, survey_id):
        """
        Activate a survey to make it available for distribution
        
        Args:
            survey_id (str): ID of the survey to activate
            
        Returns:
            bool: True if successful
        """
        print(f"Activating survey: {survey_id}")
        
        url = f"{self.base_url}surveys/{survey_id}"
        payload = json.dumps({"isActive": True})
        
        response = requests.put(url, headers=self.headers, data=payload)
        
        if response.status_code != 200:
            raise Exception(f"Failed to activate survey: {response.text}")
        
        print(f"Survey activated successfully")
        return True
    
    def create_distribution_link(self, survey_id, link_type="Anonymous"):
        """
        Create a distribution link for a survey
        
        Args:
            survey_id (str): ID of the survey to distribute
            link_type (str): Type of link (Anonymous or Individual)
            
        Returns:
            str: Distribution link URL
        """
        print(f"Creating distribution link for survey: {survey_id}")
        
        # For anonymous links, we can construct the URL directly based on the standard pattern
        # https://DATACENTERID.qualtrics.com/jfe/form/SURVEYID
        if link_type == "Anonymous":
            survey_link = f"https://{self.data_center}.qualtrics.com/jfe/form/{survey_id}"
            print(f"Anonymous survey link created: {survey_link}")
            return survey_link
        
        # For other distribution types, we would use the API, but that's not implemented yet
        else:
            raise NotImplementedError(f"Distribution type '{link_type}' is not yet supported")
    
    def get_survey_responses(self, survey_id, file_format="csv"):
        """
        Download survey responses
        
        Args:
            survey_id (str): ID of the survey
            file_format (str): Format of the response file (csv, json, spss, etc.)
            
        Returns:
            pandas.DataFrame: Survey responses as a DataFrame
        """
        print(f"Downloading responses for survey: {survey_id}")
        
        # Step 1: Create the export
        export_url = f"{self.base_url}surveys/{survey_id}/export-responses"
        export_payload = json.dumps({
            "format": file_format,
            "useLabels": True
        })
        
        export_response = requests.post(export_url, headers=self.headers, data=export_payload)
        
        if export_response.status_code != 200:
            raise Exception(f"Failed to initiate export: {export_response.text}")
        
        progress_id = export_response.json()["result"]["progressId"]
        
        # Step 2: Check export progress
        progress_status = "inProgress"
        progress = 0
        
        while progress_status != "complete" and progress < 100:
            progress_url = f"{self.base_url}surveys/{survey_id}/export-responses/{progress_id}"
            progress_response = requests.get(progress_url, headers=self.headers)
            
            if progress_response.status_code != 200:
                raise Exception(f"Failed to check export progress: {progress_response.text}")
            
            progress_result = progress_response.json()["result"]
            progress_status = progress_result["status"]
            progress = progress_result.get("percentComplete", 0)
            
            print(f"Export progress: {progress}%")
            
            if progress_status != "complete" and progress < 100:
                time.sleep(2)
        
        # Step 3: Download the file
        file_id = progress_result["fileId"]
        download_url = f"{self.base_url}surveys/{survey_id}/export-responses/{file_id}/file"
        download_response = requests.get(download_url, headers=self.headers)
        
        if download_response.status_code != 200:
            raise Exception(f"Failed to download responses: {download_response.text}")
        
        # Step 4: Extract and parse the zip file
        with zipfile.ZipFile(io.BytesIO(download_response.content)) as zip_file:
            data_file = [f for f in zip_file.namelist() if f.endswith(f".{file_format}")][0]
            with zip_file.open(data_file) as file:
                if file_format == "csv":
                    df = pd.read_csv(file)
                elif file_format == "json":
                    df = pd.read_json(file)
                else:
                    raise ValueError(f"Unsupported file format: {file_format}")
        
        print(f"Successfully downloaded {len(df)} responses")
        return df

class MTurkClient:
    """Handles all MTurk API interactions"""

    def __init__(
        self,
        aws_access_key_id: str = None,
        aws_secret_access_key: str = None,
        use_sandbox: bool = True
        ):

        if aws_access_key_id and aws_secret_access_key:
            self.aws_access_key_id     = aws_access_key_id
            self.aws_secret_access_key = aws_secret_access_key
        else:
            self.aws_access_key_id     = os.getenv('AWS_ACCESS_KEY_ID')
            self.aws_secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY')
        self.use_sandbox = use_sandbox

        if not self.aws_access_key_id or not self.aws_secret_access_key:
            raise ValueError("Missing AWS credentials")

        region = os.getenv('AWS_REGION', 'us-east-1')
        endpoint = (
            'https://mturk-requester-sandbox.us-east-1.amazonaws.com'
            if self.use_sandbox else
            'https://mturk-requester.us-east-1.amazonaws.com'
        )

        self.client = boto3.client(
            'mturk',
            aws_access_key_id=self.aws_access_key_id,
            aws_secret_access_key=self.aws_secret_access_key,
            region_name=region,
            endpoint_url=endpoint
        )
        print(f"MTurk client initialized in {'Sandbox' if self.use_sandbox else 'Production'} mode")
        
    def get_account_balance(self):
        """Get the available MTurk account balance"""
        response = self.client.get_account_balance()
        balance = response['AvailableBalance']
        print(f"MTurk account balance: ${balance}")
        return float(balance)
    
    def create_hit_with_survey_link(self, survey_link, hit_config=None):
        """
        Create an MTurk HIT with a link to a Qualtrics survey
        
        Args:
            survey_link (str): URL to the Qualtrics survey
            hit_config (dict, optional): Custom configuration for the HIT
            
        Returns:
            str: HIT ID
        """
        print("Creating MTurk HIT with survey link")
        
        # Default HIT configuration
        if not hit_config:
            hit_config = {
                'Title': 'Complete a short survey',
                'Description': 'We need your input for a quick survey that should take less than 10 minutes',
                'Keywords': 'survey, research, opinion, feedback',
                'Reward': '0.50',
                'MaxAssignments': 10,
                'LifetimeInSeconds': 86400,  # 1 day
                'AssignmentDurationInSeconds': 1800,  # 30 minutes
                'AutoApprovalDelayInSeconds': 86400,  # 1 day
                'QualificationRequirements': []
            }
        
        # Create the HTML question with the survey link
        question_html = f"""
        <HTMLQuestion xmlns="http://mechanicalturk.amazonaws.com/AWSMechanicalTurkDataSchemas/2011-11-11/HTMLQuestion.xsd">
            <HTMLContent><![CDATA[
                <!DOCTYPE html>
                <html>
                <head>
                    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
                    <script type='text/javascript' src='https://s3.amazonaws.com/mturk-public/externalHIT_v1.js'></script>
                </head>
                <body>
                    <form name='mturk_form' method='post' id='mturk_form' action='https://www.mturk.com/mturk/externalSubmit'>
                        <input type='hidden' value='' name='assignmentId' id='assignmentId'/>
                        <h1>Survey Task</h1>
                        <p>Please complete the survey at the following link:</p>
                        <p><a href='{survey_link}' target='_blank'>{survey_link}</a></p>
                        <p>After completing the survey, you will receive a completion code. Enter the code below:</p>
                        <p><input type='text' name='completion_code' id='completion_code' size='40'/></p>
                        <p><input type='submit' id='submitButton' value='Submit' /></p>
                    </form>
                    <script language='Javascript'>
                        turkSetAssignmentID();
                    </script>
                </body>
                </html>
            ]]></HTMLContent>
            <FrameHeight>600</FrameHeight>
        </HTMLQuestion>
        """
        
        # Create the HIT
        response = self.client.create_hit(
            Title=hit_config['Title'],
            Description=hit_config['Description'],
            Keywords=hit_config['Keywords'],
            Reward=hit_config['Reward'],
            MaxAssignments=hit_config['MaxAssignments'],
            LifetimeInSeconds=hit_config['LifetimeInSeconds'],
            AssignmentDurationInSeconds=hit_config['AssignmentDurationInSeconds'],
            AutoApprovalDelayInSeconds=hit_config['AutoApprovalDelayInSeconds'],
            Question=question_html,
            QualificationRequirements=hit_config['QualificationRequirements']
        )
        
        hit_id = response['HIT']['HITId']
        hit_type_id = response['HIT']['HITTypeId']
        
        print(f"HIT created successfully with ID: {hit_id}")
        
        # Print the HIT URL
        if self.use_sandbox:
            worker_url = f"https://workersandbox.mturk.com/mturk/preview?groupId={hit_type_id}"
        else:
            worker_url = f"https://worker.mturk.com/mturk/preview?groupId={hit_type_id}"
            
        print(f"Workers can access the HIT at: {worker_url}")
        
        return hit_id
    
    def get_hit_assignments(self, hit_id):
        """
        Get all assignments for a HIT
        
        Args:
            hit_id (str): ID of the HIT
            
        Returns:
            list: List of assignment dictionaries
        """
        print(f"Getting assignments for HIT: {hit_id}")
        
        # List to store all assignments
        all_assignments = []
        
        # Get assignments with pagination
        next_token = None
        
        while True:
            if next_token:
                response = self.client.list_assignments_for_hit(
                    HITId=hit_id,
                    NextToken=next_token,
                    MaxResults=100
                )
            else:
                response = self.client.list_assignments_for_hit(
                    HITId=hit_id,
                    MaxResults=100
                )
            
            all_assignments.extend(response['Assignments'])
            
            if 'NextToken' in response:
                next_token = response['NextToken']
            else:
                break
        
        print(f"Found {len(all_assignments)} assignments")
        return all_assignments
    
    def approve_assignments(self, assignments, feedback=None):
        """
        Approve multiple assignments
        
        Args:
            assignments (list): List of assignment dictionaries or IDs
            feedback (str, optional): Feedback to workers
            
        Returns:
            int: Number of successfully approved assignments
        """
        approved_count = 0
        
        for assignment in assignments:
            # Extract assignment ID if a dictionary was provided
            assignment_id = assignment['AssignmentId'] if isinstance(assignment, dict) else assignment
            
            try:
                self.client.approve_assignment(
                    AssignmentId=assignment_id,
                    RequesterFeedback=feedback if feedback else "Thank you for your participation!"
                )
                approved_count += 1
            except Exception as e:
                print(f"Error approving assignment {assignment_id}: {str(e)}")
        
        print(f"Successfully approved {approved_count} assignments")
        return approved_count
    
    def delete_hit(self, hit_id):
        """
        Delete a HIT
        
        Args:
            hit_id (str): ID of the HIT to delete
            
        Returns:
            bool: True if successful
        """
        try:
            # Get the HIT status
            hit = self.client.get_hit(HITId=hit_id)
            status = hit['HIT']['HITStatus']
            
            # If the HIT is reviewable, dispose of it
            if status == 'Reviewable':
                self.client.delete_hit(HITId=hit_id)
                print(f"HIT {hit_id} deleted successfully")
                return True
            
            # If the HIT is assignable, expire it first then delete it
            elif status == 'Assignable':
                self.client.update_expiration_for_hit(
                    HITId=hit_id,
                    ExpireAt=datetime(2015, 1, 1)  # Set to a past date to expire immediately
                )
                time.sleep(1)  # Give time for the HIT to update
                self.client.delete_hit(HITId=hit_id)
                print(f"HIT {hit_id} expired and deleted successfully")
                return True
                
            else:
                print(f"Cannot delete HIT {hit_id}, status is {status}")
                return False
                
        except Exception as e:
            print(f"Error deleting HIT {hit_id}: {str(e)}")
            return False


from dotenv import load_dotenv
from typing import Optional

class QualtricsAndMTurkAutomation:
    def __init__(self, mturk_client: Optional[MTurkClient] = None):
        load_dotenv()
        self.qualtrics = QualtricsClient()
        self.mturk     = mturk_client or MTurkClient()

    def run(self, survey_payload: dict, hit_config: dict) -> dict:
        survey_id = self.qualtrics.create_survey(
            survey_name=survey_payload["SurveyName"],
            survey_template=survey_payload
        )

        questions = []
        for qid, qobj in survey_payload["Questions"].items():
            num = qid.lstrip("Q")
            real_qid = f"QID{num}"
            questions.append({
                "question_id":   real_qid,           
                "question_text": qobj["QuestionText"], 
                "QuestionID":    real_qid,          
                "QuestionText":  qobj["QuestionText"],
                "QuestionType":  qobj["QuestionType"],
                "Selector":      qobj["Selector"],
                "SubSelector":   qobj["SubSelector"],
                "Choices":       qobj["Choices"]
            })
        self.qualtrics.add_questions(survey_id, questions)


        comp_qid = f"QID{len(questions)+1}"
        self.qualtrics.add_questions(survey_id, [{
            "question_id":   comp_qid,
            "question_text": (
                "Thank you for completing the survey!\n"
                "Your completion code is: ${e://Field/ResponseID}"
            ),
            "QuestionID":    comp_qid,
            "QuestionText":  (
                "Thank you for completing the survey!\n"
                "Your completion code is: ${e://Field/ResponseID}"
            ),
            "QuestionType":  "DB",   # Descriptive Text
            "Selector":      "TB"    # Text/Graphic Block
        }])

        self.qualtrics.activate_survey(survey_id)

        survey_link = self.qualtrics.create_distribution_link(survey_id)

        hit_id = self.mturk.create_hit_with_survey_link(survey_link, hit_config)

        return {
            "survey_id":   survey_id,
            "survey_link": survey_link,
            "hit_id":      hit_id
        }
    def collect_and_process_results(self, survey_id, hit_id, auto_approve=True):
        results = {}
        
        try:
         
            responses_df = self.qualtrics.get_survey_responses(survey_id)
            results['responses'] = responses_df
            

            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            csv_filename = f"survey_responses_{timestamp}.csv"
            responses_df.to_csv(csv_filename, index=False)
            results['csv_filename'] = csv_filename
            
            print(f"Saved {len(responses_df)} responses to {csv_filename}")
            
            assignments = self.mturk.get_hit_assignments(hit_id)
            results['assignments'] = assignments
            
            if auto_approve and assignments:
                approved_count = self.mturk.approve_assignments(assignments)
                results['approved_count'] = approved_count
            
            return results
            
        except Exception as e:
            print(f"Error collecting results: {str(e)}")
            return results



import asyncio

if __name__ == "__main__":
    # survey_to_process = """
    # Topic: The Theory of Planned Behavior Survey
    # Questions:
    # 1. I intend to purchase organic food in the next month. (1=Strongly disagree; 7=Strongly agree)
    # 2. Buying organic food is beneficial to my health. (1=Strongly disagree; 7=Strongly agree)
    # 3. I feel confident in my ability to purchase organic food if I want to. (1=Strongly disagree; 7=Strongly agree)
    # 4. People who are important to me think I should purchase organic food. (1=Strongly disagree; 7=Strongly agree)
    # 5. The decision to buy organic food is entirely up to me. (1=Strongly disagree; 7=Strongly agree)
    # """
    print("========================================")
    print("⚠️  INPUT REQUIREMENTS:")
    print("- You must include a line starting with 'Topic:'")
    print("- You must include at least one line starting with 'Questions:'")
    print("Otherwise, the survey cannot be processed.")
    print("========================================")
    survey_to_process = input("Please enter the Survey content: ")

    survey_dict = asyncio.run(flow.kickoff_async(inputs={
        'survey_text': survey_to_process
    }))

    annotated = survey_dict['original_with_comments']
    revised   = survey_dict['revised_survey'] 

    survey_dict = asyncio.run(flow.kickoff_async(inputs={
        'survey_text': survey_to_process
    }))

    
    qualtrics_payload = survey_dict_to_qualtrics_payload(survey_dict)

    hit_config = {
        'Title': 'Complete a short survey on organic food',
        'Description': survey_dict["revised_survey"]["purpose"],
        'Keywords': 'survey, research, feedback',
        'Reward': '0.75',
        'MaxAssignments': 20,
        'LifetimeInSeconds': 86400,
        'AssignmentDurationInSeconds': 1800,
        'AutoApprovalDelayInSeconds': 86400,
        'QualificationRequirements': []
    }
    
    automation = QualtricsAndMTurkAutomation()
    results = automation.run(qualtrics_payload, hit_config)



TypeError: Flow.__init__() got an unexpected keyword argument 'agents'