In [3]:
import json
from pathlib import Path
import pandas as pd

CSV_PATH = Path("Replies_Tasks.csv")        
OUT_JSON = Path("tasks_data.json")           

df = pd.read_csv(CSV_PATH)

required = {"Prompt_Id", "Reply_Id", "condition", "reply_text"}
missing = required - set(df.columns)
if missing:
    raise ValueError(f"Missing required columns: {sorted(missing)}. "
                     f"Found: {list(df.columns)}")

def is_nonempty_text(x: str) -> bool:
    return isinstance(x, str) and len(x.strip()) > 0

df = df[df["reply_text"].apply(is_nonempty_text)].copy()
df.reset_index(drop=True, inplace=True)

valid_conditions = {"A", "B"}
bad = set(df["condition"].dropna().astype(str)) - valid_conditions
if bad:
    raise ValueError(f"Unexpected condition values: {bad}. "
                     f"Expected only {valid_conditions}.")

df["bot_mask"] = df["condition"].map({"A": "Chatbot A", "B": "Chatbot B"})

tasks = []
for i, row in df.iterrows():
    task = {
        "id": int(i + 1),
        "data": {
            "reply_text": row["reply_text"],
            "bot_mask": row["bot_mask"],
        },
        "meta": {
            "Prompt_Id": str(row["Prompt_Id"]),
            "Reply_Id": str(row["Reply_Id"]),
            "condition": str(row["condition"]),
        }
    }
    tasks.append(task)

OUT_JSON.write_text(json.dumps(tasks, ensure_ascii=False, indent=2), encoding="utf-8")

n = len(tasks)
nA = (df["bot_mask"] == "Chatbot A").sum()
nB = (df["bot_mask"] == "Chatbot B").sum()
print(f"Wrote {n} tasks → {OUT_JSON}")
print(f"Distribution: Chatbot A = {nA}, Chatbot B = {nB}")
if abs(nA - nB) > 0:
    print("Note: A/B counts differ. If this isn’t intended, check your CSV.")

Wrote 120 tasks → tasks_data.json
Distribution: Chatbot A = 60, Chatbot B = 60
