In [18]:
import re
import pandas as pd
from langchain.schema import Document
import os, sys
sys.path.append(os.path.abspath("D:\\ImprovedScreening\\"))

# now imports will work
from src.screening.llm import fireworks_llm
from src.screening.prompts import USER_PROMPT_TEMPLATE
from helpers import Inquiry, Doctor

In [19]:
skills_df = pd.read_excel("D:\\ImprovedScreening\\assets\\Drs Data.xlsx", sheet_name= 'KSA')
skills_df = skills_df[
    ['Title', 'Specialty', 'SubSpecialty', 'Scope of Service', 'Degree']
]
skills_df["Specialty"] = skills_df["Specialty"].str.replace(r";#\d+", "", regex=True)
skills_df["SubSpecialty"] = skills_df["SubSpecialty"].str.replace(r";#\d+", "", regex=True)
skills_df["Degree"] = skills_df["Degree"].str.replace(r";#\d+", "", regex=True)

skills_df.head()

Unnamed: 0,Title,Specialty,SubSpecialty,Scope of Service,Degree
0,Abdelhameed Gamal Abdelhameed Ibrahim,Dental Services,Dental Services,\n● Treating children’s teeth under general an...,Specialist
1,AbdElRahman Fouad AbdElRahman Bouges,Neurology,Neurology,Neurological diseases\nHeadaches\nStroke\nEpil...,Consultant
2,Abdullah Alsaggaf,Dental Services,Screening,Dental examination\nGear cleaning\nTeeth white...,Specialist
3,ABdulrahim Alshehri,Neurology,Neurology,Diagnosis and management of brain disorders li...,Consultant
4,Abdulrahman Abu Hawi,E.N.T.,E.N.T.,\tSeptumplasty. Rhinoplasty. (open/close)\n\...,Consultant


In [20]:
specialties = list(skills_df['Specialty'].unique())

In [21]:
SYSTEM_PROMPT = f"""
You are a physician who provides medical recommendations for patients based on patient data.
You have to recommend the most suitable specialty and diagnosis to the patient based on his info and family history.
Only choose from this list of available specialties {specialties}.

Return the output strictly as JSON that matches this schema:
{Inquiry.model_json_schema()}
"""

In [22]:
patient_data = {
                "gender": "male",
                "age": 45,
                "marital_status": "Single",
                "smoke": "Yes, 10 cigarettes/day",
                "alcohol": "2 CUPS COFFE/DAY",
                "caff": "OCCASIONAL",
                "meds": "OCCASIONAL",
                "symptoms": "Chest Pain",
                "allergies": "Penicillin",
                "patient": "Diabetes",
                "father": "Heart Disease",
                "mother": "None",
                "grandparent": "Hypertension",
                "sibling": "Asthma",
                "children": "Healthy",
            }

user_prompt = USER_PROMPT_TEMPLATE.format(**patient_data)

In [23]:
recommendation = fireworks_llm._call(user_prompt=user_prompt, system_prompt=SYSTEM_PROMPT)

In [24]:
recommendation

'```json\n{"SPECIALTY": "Cardiology", "DIAGNOSIS": "Possible Coronary Artery Disease"}\n```'

In [25]:
def clean_json_output(text: str) -> str:
    # Remove ```json ... ``` or ``` ... ```
    return re.sub(r"^```(?:json)?\n|\n```$", "", text.strip())

cleaned = clean_json_output(recommendation)

inquiry = Inquiry.model_validate_json(cleaned)

In [26]:
filtered_doctors = skills_df[skills_df['Specialty'] == inquiry.SPECIALTY]
filtered_doctors.shape

(8, 5)

In [27]:
filtered_doctors

Unnamed: 0,Title,Specialty,SubSpecialty,Scope of Service,Degree
16,Ahmed Gaber,Cardiology,Cardiology,Treatment of cardiovascular diseases:\nHyperte...,Specialist
23,Ahmed Mokhtar,Cardiology,Cardiology,Pacemaker implantation\nImplantable Cardiovert...,Consultant
98,Karim fahmy,Cardiology,Cardiology,1- Adult Cardiac Case Examination\n2- Echocard...,Consultant
111,Mahmoud Sabbah,Cardiology,Interventional Cardiology,1- patients with cardiovascular and coronary d...,Consultant
140,Mohamed Maraghi,Cardiology,Cardiology,"""Arrhythmia\nRheumatic heart disease\nHeart va...",Consultant
155,Mustafa Zaitouni,Cardiology,Cardiology,". Palpitation\n. Syncope, loss of consciousnes...",Consultant
218,tamer Hikal,Cardiology,Cardiology,Adult cardiac surgery :\n*On pump CABG\n*Valve...,Consultant
330,Mohamed AbdElFattah Zaki AbdElFattah,Cardiology,Cardiology,,Specialist


In [28]:
def format_doctors(df):
    formatted = []
    for _, row in df.iterrows():
        formatted.append(
            f"{row['Title']} - {row['Degree']} in {row['SubSpecialty']} - {row['Scope of Service']}"
        )
    return "\n".join(formatted)

# Example usage
doctors_data = format_doctors(filtered_doctors)

In [29]:
SECOND_SYSTEM_PROMPT = f"""
You are a physician providing medical recommendations for patients based on their clinical data and family history.  

You are given a list of available doctors:  
{doctors_data}  

Your task:  
- Select **exactly the top 3 doctors** whose scope of services best matches the patient’s data and family history.  
- Do not return more or fewer than 3 doctors.  

Output format:  
Return the recommendations **strictly as valid JSON** following this schema:  
{Doctor.model_json_schema()}
"""

In [30]:
doctor_recommendation = fireworks_llm._call(user_prompt=user_prompt, system_prompt=SECOND_SYSTEM_PROMPT)

In [31]:
doctor_recommendation

'```json\n[\n    {\n        "NAME": "Ahmed Gaber",\n        "DEGREE": "Specialist in Cardiology",\n        "SCOPE_OF_SERVICE": [\n            "Hypertension",\n            "Arteriosclerosis",\n            "Heart rhythm disturbances",\n            "Rheumatic heart disease",\n            "Heart valve disease",\n            "ECG",\n            "24-hour electrocardiogram (holter)",\n            "Sound waves on the heart",\n            "Treatment of heart failure",\n            "Treatment of chronic"\n        ],\n        "GENDER": "Male"\n    },\n    {\n        "NAME": "Mahmoud Sabbah",\n        "DEGREE": "Consultant in Interventional Cardiology",\n        "SCOPE_OF_SERVICE": [\n            "patients with cardiovascular and coronary diseases",\n            "management of patients with acute coronary syndrome and heart attacks",\n            "Coronary interventions and Stents for acute and chronic coronary artery disease",\n            "treating complex coronary artery disease"\n        ],\n 

In [32]:
def clean_json_output(text: str) -> str:
    # Remove ```json ... ``` or ``` ... ```
    return re.sub(r"^```(?:json)?\n|\n```$", "", text.strip())

cleaned_doctor = clean_json_output(doctor_recommendation)

In [33]:
import json
json.loads(cleaned_doctor)

[{'NAME': 'Ahmed Gaber',
  'DEGREE': 'Specialist in Cardiology',
  'SCOPE_OF_SERVICE': ['Hypertension',
   'Arteriosclerosis',
   'Heart rhythm disturbances',
   'Rheumatic heart disease',
   'Heart valve disease',
   'ECG',
   '24-hour electrocardiogram (holter)',
   'Sound waves on the heart',
   'Treatment of heart failure',
   'Treatment of chronic'],
  'GENDER': 'Male'},
 {'NAME': 'Mahmoud Sabbah',
  'DEGREE': 'Consultant in Interventional Cardiology',
  'SCOPE_OF_SERVICE': ['patients with cardiovascular and coronary diseases',
   'management of patients with acute coronary syndrome and heart attacks',
   'Coronary interventions and Stents for acute and chronic coronary artery disease',
   'treating complex coronary artery disease'],
  'GENDER': 'Male'},
 {'NAME': 'Mohamed Maraghi',
  'DEGREE': 'Consultant in Cardiology',
  'SCOPE_OF_SERVICE': ['Arrhythmia',
   'Rheumatic heart disease',
   'Heart valve diseases',
   'Coronary artery disease (heart attack - angina)',
   'Heart fai

In [34]:
import json

validated_doctors = []
for doc in json.loads(cleaned_doctor):
    validated = Doctor(**doc)
    validated_doctors.append(validated)

In [35]:
validated_doctors

[Doctor(NAME='Ahmed Gaber', DEGREE='Specialist in Cardiology', SCOPE_OF_SERVICE=['Hypertension', 'Arteriosclerosis', 'Heart rhythm disturbances', 'Rheumatic heart disease', 'Heart valve disease', 'ECG', '24-hour electrocardiogram (holter)', 'Sound waves on the heart', 'Treatment of heart failure', 'Treatment of chronic'], GENDER='Male'),
 Doctor(NAME='Mahmoud Sabbah', DEGREE='Consultant in Interventional Cardiology', SCOPE_OF_SERVICE=['patients with cardiovascular and coronary diseases', 'management of patients with acute coronary syndrome and heart attacks', 'Coronary interventions and Stents for acute and chronic coronary artery disease', 'treating complex coronary artery disease'], GENDER='Male'),
 Doctor(NAME='Mohamed Maraghi', DEGREE='Consultant in Cardiology', SCOPE_OF_SERVICE=['Arrhythmia', 'Rheumatic heart disease', 'Heart valve diseases', 'Coronary artery disease (heart attack - angina)', 'Heart failure and myocardial hypertrophy', 'Treatment of acute coronary artery thrombose