In [1]:
import pandas as pd
import hashlib
import json
import uuid
from collections import deque

In [2]:
def generate_uuid(value):
    return str(uuid.UUID(hashlib.md5(value.encode()).hexdigest()))

def allocate_questions(questions, doctors):
    allocation = {doctor: [] for doctor in doctors}
    
    for i, question in enumerate(questions):
        doctor = doctors[i % len(doctors)]  # Assign in round-robin manner
        allocation[doctor].append(question)
    
    return allocation

def rotate_list_in_place(lst):
    dq = deque(lst)
    dq.rotate(1)  # Rotate right by 1
    lst[:] = list(dq)  # Update original list in place


In [3]:
conditions = 3
docs_df = pd.read_csv("./cataract_doc_study/data/doctor.csv")
questions_df = pd.read_csv("./cataract_doc_study/data/cat_ques.csv")

In [4]:
docs_df['id'] = docs_df['email'].apply(generate_uuid)
questions_df['id'] = questions_df['question'].apply(generate_uuid)

In [5]:
doctor_id = "doctor_id"
condition_id = "condition_id"
question_id = "question_id"
duration = "duration"

columns = [doctor_id, condition_id, question_id, duration, "answer", "conversation"]
doctors_question_set = pd.DataFrame(columns=columns)

In [6]:
doctor_id_list = docs_df['id'].tolist()
questions_id_list = questions_df['id'].tolist()

allocate_questions(questions_id_list, doctor_id_list)
for i in range(conditions):
    doctor_allocated_questions = allocate_questions(questions_id_list, doctor_id_list)
    for doctor, allocated_questions in doctor_allocated_questions.items():
        for question in allocated_questions:
            new_row = {
                doctor_id: doctor,
                condition_id: i,
                question_id: question,
                duration: 0,
                "answer": "",
                "conversation": ""
            }
            doctors_question_set = pd.concat([doctors_question_set, pd.DataFrame([new_row])], ignore_index=True)
    rotate_list_in_place(doctor_id_list)  # Rotate the doctor list for the next condition
doctors_question_set

Unnamed: 0,doctor_id,condition_id,question_id,duration,answer,conversation
0,90015098-3cd2-4fb0-d696-3f7d28e17f72,0,924751af-98d5-64af-7a2e-99be83cd5c90,0,,
1,90015098-3cd2-4fb0-d696-3f7d28e17f72,0,8a9ea3be-d014-69aa-9653-efee46a1f026,0,,
2,a256e6b3-36af-dc38-c564-789c399b516c,0,efff1dc7-4c3f-81d0-ad67-6f2573cb9ce5,0,,
3,a256e6b3-36af-dc38-c564-789c399b516c,0,321523cc-eb36-0f89-18e7-4d087fe11e1f,0,,
4,4ed94076-30eb-1000-c0f6-b63842defa7d,0,07bc93ee-94f4-c0c6-4d8b-818ddc1bec49,0,,
5,4ed94076-30eb-1000-c0f6-b63842defa7d,0,47228332-c260-5c50-77c8-4255a57b9319,0,,
6,826bbc5d-0522-f5f2-0a1d-a4b60fa8c871,0,253a9583-e13e-843c-a6c2-33a712ff4ea5,0,,
7,826bbc5d-0522-f5f2-0a1d-a4b60fa8c871,0,c416d70d-64cf-33f6-a985-04f1ee10e7c7,0,,
8,699a474e-923b-8da5-d7ae-fbfc54a8a2bd,0,663e22e4-2bad-53a2-35be-b9af5a804d7a,0,,
9,699a474e-923b-8da5-d7ae-fbfc54a8a2bd,0,154f340f-34ca-6eea-7170-df44fe19c529,0,,


In [7]:
docs_json = docs_df.to_dict(orient="records")
docs_df.to_json("docs.json", orient="records", indent=4)  # `indent=4` for readable formatting
questions_df.to_json("questions.json", orient="records", indent=4) 

In [8]:
# Grouping by doctor_id
doctors_question_set_json = []
for user_id, group in doctors_question_set.groupby("doctor_id"):
    questions_list = group[["question_id", "condition_id"]].to_dict(orient="records")
    doctors_question_set_json.append({
        "user_id": user_id,
        "questions_list": questions_list,
        "progress_id": 0
    })
doctors_question_set_json

[{'user_id': '4ed94076-30eb-1000-c0f6-b63842defa7d',
  'questions_list': [{'question_id': '07bc93ee-94f4-c0c6-4d8b-818ddc1bec49',
    'condition_id': 0},
   {'question_id': '47228332-c260-5c50-77c8-4255a57b9319', 'condition_id': 0},
   {'question_id': '253a9583-e13e-843c-a6c2-33a712ff4ea5', 'condition_id': 1},
   {'question_id': 'c416d70d-64cf-33f6-a985-04f1ee10e7c7', 'condition_id': 1},
   {'question_id': '663e22e4-2bad-53a2-35be-b9af5a804d7a', 'condition_id': 2},
   {'question_id': '154f340f-34ca-6eea-7170-df44fe19c529', 'condition_id': 2}],
  'progress_id': 0},
 {'user_id': '699a474e-923b-8da5-d7ae-fbfc54a8a2bd',
  'questions_list': [{'question_id': '663e22e4-2bad-53a2-35be-b9af5a804d7a',
    'condition_id': 0},
   {'question_id': '154f340f-34ca-6eea-7170-df44fe19c529', 'condition_id': 0},
   {'question_id': '924751af-98d5-64af-7a2e-99be83cd5c90', 'condition_id': 1},
   {'question_id': '8a9ea3be-d014-69aa-9653-efee46a1f026', 'condition_id': 1},
   {'question_id': 'efff1dc7-4c3f-81d0

In [9]:
with open("doctor_question_set.json", "w") as f:
    json.dump(doctors_question_set_json, f, indent=4)

In [10]:
from cataract_doc_study.dependency_setup import user_client, survey_client

documents = []
for doctor_question_set in doctors_question_set_json:
    question_set = {
        "_id": doctor_question_set["user_id"],
        "user_id": doctor_question_set["user_id"],
        "questions_list": doctor_question_set["questions_list"],
        "progress_id": doctor_question_set["progress_id"],
    }
    documents.append(question_set)
await user_client.ainsert(documents)


  delegate = self.__delegate_class__(*args, **kwargs)


(['4ed94076-30eb-1000-c0f6-b63842defa7d',
  '699a474e-923b-8da5-d7ae-fbfc54a8a2bd',
  '826bbc5d-0522-f5f2-0a1d-a4b60fa8c871',
  '90015098-3cd2-4fb0-d696-3f7d28e17f72',
  'a256e6b3-36af-dc38-c564-789c399b516c'],
 None)

In [11]:
# await user_client.adelete_collection()
from cataract_doc_study.dependency_setup import user_client, survey_client
await survey_client.adelete_collection()
await user_client.adelete_collection()