In [None]:
import random
import pandas as pd
import spacy
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression

# Mock dataset of symptoms and associated diseases
symptom_disease_data = {
    "fever": ["Common Cold", "Influenza"],
    "cough": ["Bronchitis", "COVID-19"],
    "headache": ["Migraine", "Common Cold"],
    "fatigue": ["Anemia", "COVID-19"],
    "muscle pain": ["Influenza", "COVID-19"],
    "sore throat": ["Strep Throat", "Common Cold"],
    "shortness of breath": ["Asthma", "COVID-19"],
    "loss of taste or smell": ["COVID-19"],
    "nausea": ["Food poisoning", "COVID-19"],
    "diarrhea": ["Gastroenteritis", "COVID-19"]
}

# Convert symptom_disease_data to DataFrame
symptom_df = pd.DataFrame([(symptom, disease) for symptom, diseases in symptom_disease_data.items() for disease in diseases],
                          columns=['Symptom', 'Disease'])

# Mock healthcare provider database with ratings and appointments
healthcare_providers_data = {
    "Dr. John Smith": {"Specialty": "Internal Medicine", "Ratings": 4.5, "Appointments": 10, "Working Days": ["Monday", "Wednesday"], "Shift": "Morning"},
    "Dr. Michael Brown": {"Specialty": "Internal Medicine", "Ratings": 4.2, "Appointments": 15, "Working Days": ["Tuesday", "Thursday"], "Shift": "Afternoon"},
    "Dr. Sarah Lee": {"Specialty": "Pulmonology", "Ratings": 4.8, "Appointments": 20, "Working Days": ["Monday", "Wednesday"], "Shift": "Afternoon"},
    "Dr. David Martinez": {"Specialty": "Pulmonology", "Ratings": 4.6, "Appointments": 5, "Working Days": ["Tuesday", "Thursday"], "Shift": "Morning"},
    "Dr. Emily Johnson": {"Specialty": "Infectious Diseases", "Ratings": 4.7, "Appointments": 18, "Working Days": ["Monday", "Wednesday"], "Shift": "Morning"},
    "Dr. Kevin Wang": {"Specialty": "Infectious Diseases", "Ratings": 4.9, "Appointments": 12, "Working Days": ["Tuesday", "Thursday"], "Shift": "Afternoon"},
    "Dr. Jessica Chen": {"Specialty": "Neurology", "Ratings": 4.4, "Appointments": 22, "Working Days": ["Monday", "Wednesday"], "Shift": "Afternoon"},
    "Dr. Ryan Kim": {"Specialty": "Neurology", "Ratings": 4.3, "Appointments": 8, "Working Days": ["Tuesday", "Thursday"], "Shift": "Morning"},
    "Dr. Sophia Jones": {"Specialty": "Hematology", "Ratings": 4.6, "Appointments": 25, "Working Days": ["Monday", "Wednesday"], "Shift": "Morning"},
    "Dr. William Garcia": {"Specialty": "Hematology", "Ratings": 4.8, "Appointments": 10, "Working Days": ["Tuesday", "Thursday"], "Shift": "Afternoon"},
    "Dr. Rachel Patel": {"Specialty": "Otorhinolaryngology", "Ratings": 4.2, "Appointments": 20, "Working Days": ["Monday", "Wednesday"], "Shift": "Afternoon"},
    "Dr. Mohammad Khan": {"Specialty": "Otorhinolaryngology", "Ratings": 4.5, "Appointments": 15, "Working Days": ["Tuesday", "Thursday"], "Shift": "Morning"},
    "Dr. Lisa Miller": {"Specialty": "Gastroenterology", "Ratings": 4.9, "Appointments": 30, "Working Days": ["Monday", "Wednesday"], "Shift": "Morning"},
    "Dr. Andrew Taylor": {"Specialty": "Gastroenterology", "Ratings": 4.7, "Appointments": 20, "Working Days": ["Tuesday", "Thursday"], "Shift": "Afternoon"}
}

# Convert healthcare_providers_data to DataFrame
healthcare_providers_df = pd.DataFrame(healthcare_providers_data).T

# Disease department mapping
disease_department_mapping_data = {
    "Common Cold": "Internal Medicine",
    "Influenza": "Internal Medicine",
    "Bronchitis": "Pulmonology",
    "COVID-19": "Infectious Diseases",
    "Migraine": "Neurology",
    "Anemia": "Hematology",
    "Strep Throat": "Otorhinolaryngology",
    "Asthma": "Pulmonology",
    "Food poisoning": "Gastroenterology",
    "Gastroenteritis": "Gastroenterology"
}

# Convert disease_department_mapping_data to DataFrame
disease_department_mapping_df = pd.DataFrame(list(disease_department_mapping_data.items()), columns=['Disease', 'Department'])

# Function to generate a mock dataset
def generate_mock_dataset(num_samples):
    mock_data = []
    for _ in range(num_samples):
        symptoms = random.sample(symptom_disease_data.keys(), random.randint(1, len(symptom_disease_data)))
        diseases = [disease for symptom in symptoms for disease in symptom_disease_data[symptom]]
        disease = random.choice(list(set(diseases)))
        mock_data.append({'Symptoms': ', '.join(symptoms), 'Disease': disease})
    return mock_data

# Function to train the classifier
def train_classifier(X, y):
    vectorizer = CountVectorizer()
    X_vectorized = vectorizer.fit_transform(X)
    classifier = LogisticRegression()
    classifier.fit(X_vectorized, y)
    return classifier, vectorizer

# Function to classify user input
def classify_user_input(user_input, classifier, vectorizer):
    user_input_vectorized = vectorizer.transform([user_input])
    predicted_disease = classifier.predict(user_input_vectorized)[0]
    return predicted_disease

# Function to extract symptoms from user input using spaCy
def extract_symptoms(user_input):
    nlp = spacy.load('en_core_web_sm')
    symptoms = []
    doc = nlp(user_input)
    for ent in doc.ents:
        if ent.label_ == 'SYMPTOM':
            symptoms.append(ent.text)
    return symptoms

# Function to find doctors for a given department
def find_doctors_for_department(department):
    doctors = healthcare_providers_df[healthcare_providers_df['Specialty'] == department]
    return doctors

# Function to recommend doctors based on the department of the predicted disease and schedule appointments
def recommend_doctors(predicted_disease, user_preferences):
    department = disease_department_mapping_df[disease_department_mapping_df['Disease'] == predicted_disease]['Department']
    if not department.empty:
        department = department.iloc[0]
        doctors = find_doctors_for_department(department)
        doctors = doctors.sort_values(by=['Appointments'])
        if not doctors.empty:
            recommended_doctor = doctors.iloc[0]
            # Increment appointment number for the recommended doctor
            healthcare_providers_df.at[recommended_doctor.name, 'Appointments'] += 1
            return recommended_doctor
        else:
            return None
    else:
        return None

# Function to schedule an appointment for the user with the recommended doctor
def schedule_appointment(user, recommended_doctor, user_preferences):
    preferred_days = user_preferences.get("preferred_days", [])
    preferred_shift = user_preferences.get("preferred_shift", "")
    working_days = recommended_doctor['Working Days']
    shift = recommended_doctor['Shift']
    for day in preferred_days:
        if day in working_days and preferred_shift.lower() == shift.lower():
            # Increment appointment number for the user
            user['Appointment'] = recommended_doctor['Appointments']
            return True
    return False

# Example usage
# Create mock dataset with 50 points using pandas DataFrame
mock_dataset = generate_mock_dataset(100)
mock_df = pd.DataFrame(mock_dataset)

# Split dataset into features (symptoms) and labels (diseases)
X = mock_df['Symptoms']
y = mock_df['Disease']

# Train the classifier
classifier, vectorizer = train_classifier(X, y)

# Simulate multiple users
num_users = 3
for i in range(num_users):
    print(f"User {i + 1}:")
    # User input for symptoms
    user_input = input("Please describe your symptoms: ").lower()

    # Extract symptoms from user input
    user_symptoms = extract_symptoms(user_input)

    # Classify user input
    predicted_disease = classify_user_input(user_input, classifier, vectorizer)
    print("Predicted disease based on symptoms:", predicted_disease)

    # Recommend doctors based on the predicted disease and schedule appointments
    user_preferences = {
        "preferred_days": ["Monday", "Wednesday"],
        "preferred_shift": "Morning"
    }
    recommended_doctor = recommend_doctors(predicted_disease, user_preferences)
    if recommended_doctor is not None:
        print("Recommended doctor:")
        print(recommended_doctor)
        # Schedule appointment for the user
        user = {'Name': f"User {i + 1}"}
        if schedule_appointment(user, recommended_doctor, user_preferences):
            print("Appointment scheduled successfully.")
            print("User details with appointment:")
            print(user)
        else:
            print("No available appointments matching user preferences.")
    else:
        print("No doctors available for the predicted disease department.")
    print()


since Python 3.9 and will be removed in a subsequent version.
  symptoms = random.sample(symptom_disease_data.keys(), random.randint(1, len(symptom_disease_data)))


User 1:
Please describe your symptoms: i have fever, headache
Predicted disease based on symptoms: COVID-19
Recommended doctor:
Specialty       Infectious Diseases
Ratings                         4.9
Appointments                     12
Working Days    [Tuesday, Thursday]
Shift                     Afternoon
Name: Dr. Kevin Wang, dtype: object
No available appointments matching user preferences.

User 2:
Please describe your symptoms: i have fever
Predicted disease based on symptoms: Influenza
Recommended doctor:
Specialty         Internal Medicine
Ratings                         4.5
Appointments                     10
Working Days    [Monday, Wednesday]
Shift                       Morning
Name: Dr. John Smith, dtype: object
Appointment scheduled successfully.
User details with appointment:
{'Name': 'User 2', 'Appointment': 10}

User 3:
