In [1]:
import pandas as pd
import hashlib
import json
import uuid
from collections import deque

In [2]:
def generate_uuid(value):
    return str(uuid.UUID(hashlib.md5(value.encode()).hexdigest()))

def allocate_questions(questions, doctors):
    allocation = {doctor: [] for doctor in doctors}
    
    for i, question in enumerate(questions):
        doctor = doctors[i % len(doctors)]  # Assign in round-robin manner
        allocation[doctor].append(question)
    
    return allocation

def rotate_list_in_place(lst):
    dq = deque(lst)
    dq.rotate(1)  # Rotate right by 1
    lst[:] = list(dq)  # Update original list in place


In [3]:
import pandas as pd

questions_df = pd.DataFrame({
    'question': [
        "Can I eat breakfast before the surgery?",
        "What are the dos and donts after surgery?",
        "How safe is the surgery?"
    ],
    'answer': [
        "Before cataract surgery, you’ll usually be asked not to eat or drink anything for at least 6 hours beforehand. This is to reduce the risk of complications from anesthesia. If your surgery is in the morning, you’ll likely need to skip breakfast. However, some clinics allow small sips of water or clear fluids up to 2 hours before. Always follow the exact instructions your doctor or surgical team gives you—they’ll tell you what’s safe for your specific case. If you’re unsure, call the clinic to confirm.",
        "After cataract surgery, do rest your eyes, use prescribed eye drops, and wear the protective shield as advised. Keep the eye clean and avoid rubbing it. Don’t lift heavy objects, bend over deeply, swim, or expose your eye to dust or dirty water for at least a week. Avoid eye makeup and strenuous exercise until your doctor says it’s safe. Wear sunglasses outdoors to protect from bright light. Attend all follow-up appointments to monitor healing. If you notice pain, worsening vision, or redness, contact your doctor promptly. Always follow your surgeon’s specific instructions for the best recovery.",
        "Cataract surgery is very safe and one of the most common surgeries worldwide. Over 95% of cases improve vision without serious complications. Modern techniques use small incisions and quick recovery times. Risks like infection, swelling, or retinal detachment are rare but possible. Most side effects, such as mild discomfort or blurred vision, are temporary and resolve within days or weeks. Your surgeon will check your eye health beforehand to minimize risks. Following aftercare instructions helps ensure a smooth recovery. If you have other eye conditions, discuss them with your doctor to understand your personal risk better."
    ]
})


In [4]:
docs_df = pd.read_csv("/home/rash598/doctor_sys_3/cataract-doc-study/cataract_doc_study/data/doctor.csv")

In [5]:
docs_df['id'] = docs_df['email'].apply(generate_uuid)
questions_df['id'] = questions_df['question'].apply(generate_uuid)

In [6]:
doctor_id = "doctor_id"
condition_id = "condition_id"
question_id = "question_id"
duration = "duration"

columns = [doctor_id, condition_id, question_id, duration, "answer", "conversation"]
doctors_question_set = pd.DataFrame(columns=columns)

In [7]:
doctor_id_list = docs_df['id'].tolist()
questions_id_list = questions_df['id'].tolist()

for i in range(3):
    for doc_id in doctor_id_list:
        for q_id in questions_id_list:
            new_row = {
                doctor_id: doc_id,
                condition_id: i,
                question_id: q_id,
                duration: 0,
                "answer": "",
                "conversation": ""
            }
            doctors_question_set = pd.concat([doctors_question_set, pd.DataFrame([new_row])], ignore_index=True)

In [8]:
doctors_question_set

Unnamed: 0,doctor_id,condition_id,question_id,duration,answer,conversation
0,90a06836-7e7c-95a4-bd7b-274f44cbc0da,0,e6cf982f-c69b-d70c-c4f3-0c1f26b12fd9,0,,
1,90a06836-7e7c-95a4-bd7b-274f44cbc0da,0,cc7e0bd4-ac0f-6fbd-12ea-a26e566b8bd7,0,,
2,90a06836-7e7c-95a4-bd7b-274f44cbc0da,0,1cc63bda-98aa-96cb-23f6-a91fe19b6bc6,0,,
3,619744a7-7d59-4835-a856-e882a9d357c1,0,e6cf982f-c69b-d70c-c4f3-0c1f26b12fd9,0,,
4,619744a7-7d59-4835-a856-e882a9d357c1,0,cc7e0bd4-ac0f-6fbd-12ea-a26e566b8bd7,0,,
5,619744a7-7d59-4835-a856-e882a9d357c1,0,1cc63bda-98aa-96cb-23f6-a91fe19b6bc6,0,,
6,bf58d3ae-3a4f-63fd-c971-9204846e3ffd,0,e6cf982f-c69b-d70c-c4f3-0c1f26b12fd9,0,,
7,bf58d3ae-3a4f-63fd-c971-9204846e3ffd,0,cc7e0bd4-ac0f-6fbd-12ea-a26e566b8bd7,0,,
8,bf58d3ae-3a4f-63fd-c971-9204846e3ffd,0,1cc63bda-98aa-96cb-23f6-a91fe19b6bc6,0,,
9,695edb5f-da46-61f3-da2a-7d7c0ea43325,0,e6cf982f-c69b-d70c-c4f3-0c1f26b12fd9,0,,


In [9]:
docs_json = docs_df.to_dict(orient="records")
docs_df.to_json("pilot_docs.json", orient="records", indent=4)  # `indent=4` for readable formatting
questions_df.to_json("pilot_questions.json", orient="records", indent=4) 

In [10]:
# Grouping by doctor_id
doctors_question_set_json = []
for user_id, group in doctors_question_set.groupby("doctor_id"):
    questions_list = group[["question_id", "condition_id"]].to_dict(orient="records")
    doctors_question_set_json.append({
        "user_id": user_id,
        "questions_list": questions_list,
        "progress_id": 0
    })
doctors_question_set_json

[{'user_id': '619744a7-7d59-4835-a856-e882a9d357c1',
  'questions_list': [{'question_id': 'e6cf982f-c69b-d70c-c4f3-0c1f26b12fd9',
    'condition_id': 0},
   {'question_id': 'cc7e0bd4-ac0f-6fbd-12ea-a26e566b8bd7', 'condition_id': 0},
   {'question_id': '1cc63bda-98aa-96cb-23f6-a91fe19b6bc6', 'condition_id': 0},
   {'question_id': 'e6cf982f-c69b-d70c-c4f3-0c1f26b12fd9', 'condition_id': 1},
   {'question_id': 'cc7e0bd4-ac0f-6fbd-12ea-a26e566b8bd7', 'condition_id': 1},
   {'question_id': '1cc63bda-98aa-96cb-23f6-a91fe19b6bc6', 'condition_id': 1},
   {'question_id': 'e6cf982f-c69b-d70c-c4f3-0c1f26b12fd9', 'condition_id': 2},
   {'question_id': 'cc7e0bd4-ac0f-6fbd-12ea-a26e566b8bd7', 'condition_id': 2},
   {'question_id': '1cc63bda-98aa-96cb-23f6-a91fe19b6bc6', 'condition_id': 2}],
  'progress_id': 0},
 {'user_id': '695edb5f-da46-61f3-da2a-7d7c0ea43325',
  'questions_list': [{'question_id': 'e6cf982f-c69b-d70c-c4f3-0c1f26b12fd9',
    'condition_id': 0},
   {'question_id': 'cc7e0bd4-ac0f-6fbd

In [11]:
with open("pilot_doctor_question_set.json", "w") as f:
    json.dump(doctors_question_set_json, f, indent=4)

In [12]:
from cataract_doc_study.dependency_setup import user_client, survey_client

documents = []
for doctor_question_set in doctors_question_set_json:
    question_set = {
        "_id": doctor_question_set["user_id"],
        "user_id": doctor_question_set["user_id"],
        "questions_list": doctor_question_set["questions_list"],
        "progress_id": doctor_question_set["progress_id"],
    }
    documents.append(question_set)
await user_client.ainsert(documents)

  delegate = self.__delegate_class__(*args, **kwargs)


(['619744a7-7d59-4835-a856-e882a9d357c1',
  '695edb5f-da46-61f3-da2a-7d7c0ea43325',
  '90a06836-7e7c-95a4-bd7b-274f44cbc0da',
  'bf58d3ae-3a4f-63fd-c971-9204846e3ffd',
  'd0d22c8b-48bb-5377-cc87-95e9f500d9a8'],
 None)