In [7]:
import re
import sqlite3
import joblib
import numpy as np
import pandas as pd
from datetime import datetime

specialists_df = pd.read_csv('C:\SUDHA\Personal portfolio project\datasets\specialists_database_extended.csv')

# Database Setup
def initialize_database():
    conn = sqlite3.connect('patient_history.db')
    cursor = conn.cursor()
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS patients (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            name TEXT,
            age INTEGER,
            gender TEXT,
            symptoms TEXT,
            detailed_info TEXT,
            diagnosis TEXT,
            date TEXT
        )
    ''')
    conn.commit()
    conn.close()

def save_patient_history(name, age, gender, symptoms, detailed_info, diagnosis, date):
    conn = sqlite3.connect('patient_history.db')
    cursor = conn.cursor()
    cursor.execute('''
        INSERT INTO patients (name, age, gender, symptoms, detailed_info, diagnosis, date)
        VALUES (?, ?, ?, ?, ?, ?, ?)
    ''', (name, age, gender, symptoms, detailed_info, diagnosis, date))
    conn.commit()
    conn.close()

# Load Models
def load_models():
    return {
        "diabetes": joblib.load(r"C:\SUDHA\Personal portfolio project\Models\diabetes_rf_model.pkl"),
        "cancer": joblib.load(r"C:\SUDHA\Personal portfolio project\Models\cancer_rf_model.pkl"),
        "heart_disease": joblib.load(r"C:\SUDHA\Personal portfolio project\Models\heart_disease_rf_model_multiclass.pkl"),
        "covid19": joblib.load(r"C:\SUDHA\Personal portfolio project\Models\covid_rf_model.pkl")
    }

models = load_models()

def recommend_specialists_with_fallback(disease, pincode, city=None, top_n=5):
    # Ensure Pincode is treated as a string
    specialists_df['Pincode'] = specialists_df['Pincode'].astype(str)

    # Exact match
    exact_match = specialists_df[
        (specialists_df['Disease'].str.lower() == disease.lower()) &
        (specialists_df['Pincode'] == str(pincode))
    ]

    if not exact_match.empty:
        # Sort by Yelp Ratings
        exact_match = exact_match.sort_values(by='Yelp Ratings', ascending=False)
        if len(exact_match) >= top_n:
            return exact_match.head(top_n)

    # Fallback: Nearby pincodes (if applicable)
    nearby_match = specialists_df[
        (specialists_df['Disease'].str.lower() == disease.lower()) &
        (specialists_df['Pincode'].str.startswith(str(pincode)[:3]))  # Use first 3 digits
    ]

    if not nearby_match.empty:
        # Combine exact and nearby matches
        combined_matches = pd.concat([exact_match, nearby_match]).drop_duplicates()
        combined_matches = combined_matches.sort_values(by='Yelp Ratings', ascending=False)
        if len(combined_matches) >= top_n:
            return combined_matches.head(top_n)

    # Fallback: City level
    if city:
        city_match = specialists_df[
            (specialists_df['Disease'].str.lower() == disease.lower()) &
            (specialists_df['City'].str.lower() == city.lower())
        ]

        if not city_match.empty:
            # Combine exact, nearby, and city matches
            combined_matches = pd.concat([exact_match, nearby_match, city_match]).drop_duplicates()
            combined_matches = combined_matches.sort_values(by='Yelp Ratings', ascending=False)
            return combined_matches.head(top_n)

    # If no sufficient matches, return what is available
    fallback = pd.concat([exact_match, nearby_match, city_match]).drop_duplicates()
    fallback = fallback.sort_values(by='Yelp Ratings', ascending=False)
    return fallback.head(top_n) if not fallback.empty else pd.DataFrame()

def recommend_specialists(disease):
    pincode = input("Please provide your pincode: ")
    city = input("What city are you located in? ")
    specialists = recommend_specialists_with_fallback(disease, pincode, city)

    if not specialists.empty:
        print("\nHere are some specialists near you:")
        for _, row in specialists.iterrows():
            print(f"\nName: {row['Name']}")
            print(f"Specialization: {row['Specialization']}")
            print(f"Address: {row['Address']}")
            print(f"Contact: {row['Contact']}")
            print(f"Yelp Ratings: {row['Yelp Ratings']}")
            print(f"Additional Info: {row['Additional Info']}")
            print("-" * 40)
    else:
        print("\nSorry, no specialists found for your location. Please try another pincode or city.")

# Greeting and User Information
def greet_user():
    print("Welcome to the Medical Diagnosis Chatbot!")
    print("I'm here to assist you in identifying potential health concerns.\n")
    name = input("To begin, may I have your name? ")
    age = input("How old are you? ")
    gender = input("What is your gender? (Male/Female/Other): ")
    print(f"\nThank you, {name}. Now let's have a look at you and see how you're doing.\n")
    return name, age, gender

# Disease Identification
def identify_disease_group():
    print("Let's go through some symptoms to help identify any health issues you may have.")
    symptoms = {
        "Difficulty Breathing": input("Are you experiencing difficulty breathing? (y/n): ").lower(),
        "Shortness of Breath": input("Are you experiencing shortness of breath? (y/n): ").lower(),
        "Chest Pain": input("Are you experiencing chest pain? (y/n): ").lower(),
        "Cough": input("Are you experiencing cough? (y/n): ").lower(),
        "Fever": input("Are you experiencing fever? (y/n): ").lower(),
        "Loss of Taste": input("Have you experienced loss of taste? (y/n): ").lower(),
        "Loss of Smell": input("Have you experienced loss of smell? (y/n): ").lower(),
        "Weight Loss": input("Have you experienced weight loss? (y/n): ").lower(),
        "Lump": input("Have you noticed a lump? (y/n): ").lower(),
        "Unusual Bleeding": input("Have you experienced unusual bleeding? (y/n): ").lower(),
        "Persistent Cough": input("Are you experiencing a persistent cough? (y/n): ").lower(),
        "Frequent Urination": input("Are you experiencing frequent urination? (y/n): ").lower(),
        "Increased Thirst": input("Are you experiencing increased thirst? (y/n): ").lower(),
        "Unexplained Weight Loss": input("Have you experienced unexplained weight loss? (y/n): ").lower()
    }

    if symptoms["Difficulty Breathing"] == 'y' or symptoms["Shortness of Breath"] == 'y' or symptoms["Chest Pain"] == 'y':
        return "heart_disease"
    elif symptoms["Cough"] == 'y' or symptoms["Fever"] == 'y' or symptoms["Loss of Taste"] == 'y' or symptoms["Loss of Smell"] == 'y':
        return "covid19"
    elif symptoms["Weight Loss"] == 'y' or symptoms["Lump"] == 'y' or symptoms["Unusual Bleeding"] == 'y' or symptoms["Persistent Cough"] == 'y':
        return "cancer"
    elif symptoms["Frequent Urination"] == 'y' or symptoms["Increased Thirst"] == 'y' or symptoms["Unexplained Weight Loss"] == 'y':
        return "diabetes"
    else:
        return None

# Prediction Functions
def predict_diabetes():
    print("Enter the following details:")
    try:
        pregnancies = int(input("Pregnancies: "))
        glucose = float(input("Glucose Level: "))
        blood_pressure = float(input("Blood Pressure: "))
        skin_thickness = float(input("Skin Thickness: "))
        insulin = float(input("Insulin Level: "))
        bmi = float(input("BMI (Body Mass Index): "))
        diabetes_pedigree = float(input("Diabetes Pedigree Function: "))
        age = int(input("Age: "))

        features = np.array([[pregnancies, glucose, blood_pressure, skin_thickness, insulin, bmi, diabetes_pedigree, age]])
        prediction = models["diabetes"].predict(features)
        probability = models["diabetes"].predict_proba(features)[:, 1][0]

        if prediction[0] == 1:
            print(f"\nThe person is likely to have diabetes with a confidence of {probability:.2%}.")
        else:
            print(f"\nThe person is unlikely to have diabetes with a confidence of {(1 - probability):.2%}.")

    except ValueError:
        print("\nInvalid input! Please enter numerical values for all symptoms.")

def predict_covid():
    print("Enter the following details (1 for Yes, 0 for No):")
    try:
        user_inputs = {
            "Fever": int(input("Fever: ")),
            "Tiredness": int(input("Tiredness: ")),
            "Dry-Cough": int(input("Dry Cough: ")),
            "Difficulty-in-Breathing": int(input("Difficulty in Breathing: ")),
            "Sore-Throat": int(input("Sore Throat: ")),
            "Pains": int(input("Pains: ")),
            "Nasal-Congestion": int(input("Nasal Congestion: ")),
            "Runny-Nose": int(input("Runny Nose: ")),
            "Diarrhea": int(input("Diarrhea: ")),
            "Age_0-9": int(input("Age 0-9 (1 for Yes, 0 for No): ")),
            "Age_10-19": int(input("Age 10-19 (1 for Yes, 0 for No): ")),
            "Age_20-24": int(input("Age 20-24 (1 for Yes, 0 for No): ")),
            "Age_25-59": int(input("Age 25-59 (1 for Yes, 0 for No): ")),
            "Age_60+": int(input("Age 60+ (1 for Yes, 0 for No): ")),
            "Gender_Female": int(input("Gender Female (1 for Yes, 0 for No): ")),
            "Gender_Male": int(input("Gender Male (1 for Yes, 0 for No): ")),
            "Gender_Transgender": int(input("Gender Transgender (1 for Yes, 0 for No): ")),
            "Contact_Yes": int(input("Contact with COVID-19 Positive Person (1 for Yes, 0 for No): ")),
            "Contact_No": int(input("No Contact with COVID-19 Positive Person (1 for Yes, 0 for No): ")),
            "Contact_Dont-Know": int(input("Uncertain Contact with COVID-19 Positive Person (1 for Yes, 0 for No): ")),
            "None_Sympton": 0,
            "None_Experiencing": 0,
            "Severity_Mild": 0,
            "Severity_Moderate": 0,
            "Severity_Severe": 0,
            "Severity_None": 0,
        }

        expected_features = models["covid19"].feature_names_in_
        features = pd.DataFrame([[user_inputs[feature] for feature in expected_features]], columns=expected_features)

        prediction = models["covid19"].predict(features)
        probability = models["covid19"].predict_proba(features)[:, 1][0]

        if prediction[0] == 1:
            print(f"\nThe person is likely to have COVID-19 with a confidence of {probability:.2%}.")
        else:
            print(f"\nThe person is unlikely to have COVID-19 with a confidence of {(1 - probability):.2%}.")

    except ValueError as e:
        print("\nInvalid input! Please enter 1 or 0 for all symptoms and details.")
        print(f"Error: {e}")

def predict_heart_disease():
    print("Enter the following details (provide numerical values as appropriate):")
    try:
        age = int(input("Age: "))
        sex = int(input("Sex (1 = Male, 0 = Female): "))
        cp = int(input("Chest Pain Type (0-3): "))
        trestbps = float(input("Resting Blood Pressure: "))
        chol = float(input("Serum Cholesterol (mg/dl): "))
        fbs = int(input("Fasting Blood Sugar > 120 mg/dl (1 = True, 0 = False): "))
        restecg = int(input("Resting ECG Results (0-2): "))
        thalach = float(input("Maximum Heart Rate Achieved: "))
        exang = int(input("Exercise Induced Angina (1 = Yes, 0 = No): "))
        oldpeak = float(input("ST Depression Induced by Exercise Relative to Rest: "))
        slope = int(input("Slope of the Peak Exercise ST Segment (0-2): "))
        ca = int(input("Number of Major Vessels Colored by Fluoroscopy (0-4): "))
        thal = int(input("Thalassemia (1 = Normal, 2 = Fixed Defect, 3 = Reversible Defect): "))

        input_data = {"age": [age], "sex": [sex], "cp": [cp], "trestbps": [trestbps], "chol": [chol], "fbs": [fbs], "restecg": [restecg], "thalach": [thalach], "exang": [exang], "oldpeak": [oldpeak], "slope": [slope], "ca": [ca], "thal": [thal]}

        features = pd.DataFrame(input_data)
        categorical_columns = ['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'thal']
        features = pd.get_dummies(features, columns=categorical_columns, drop_first=True)

        expected_features = models["heart_disease"].feature_names_in_
        for col in expected_features:
            if col not in features.columns:
                features[col] = 0
        features = features[expected_features]

        prediction = models["heart_disease"].predict(features)
        probabilities = models["heart_disease"].predict_proba(features)

        stages = {
            0: "No heart disease",
            1: "Mild heart disease (Stage 1)",
            2: "Moderate heart disease (Stage 2)",
            3: "Severe heart disease (Stage 3)",
            4: "Critical heart disease (Stage 4)"
        }

        print(f"\nPredicted Stage: {stages[prediction[0]]}")
        print("Confidence for each stage:")
        for i, prob in enumerate(probabilities[0]):
            print(f"Stage {i}: {prob:.2%}")

    except ValueError as e:
        print("\nInvalid input! Please ensure numerical values are entered.")
        print(f"Error: {e}")

def predict_cancer():
    print("Enter the following details (1 for Yes, 0 for No):")
    try:
        fatigue = int(input("Fatigue: "))
        weight_loss = int(input("Weight Loss: "))
        pain = int(input("Pain: "))
        lump = int(input("Lump: "))
        bleeding = int(input("Bleeding: "))
        cough = int(input("Cough: "))
        difficulty_swallowing = int(input("Difficulty Swallowing: "))
        skin_changes = int(input("Skin Changes: "))
        fever = int(input("Fever: "))

        features = np.array([[fatigue, weight_loss, pain, lump, bleeding, cough, difficulty_swallowing, skin_changes, fever]])
        prediction = models["cancer"].predict(features)
        probabilities = models["cancer"].predict_proba(features)

        predicted_label = joblib.load(r"C:\SUDHA\Personal portfolio project\Models\cancer_label_encoder.pkl").inverse_transform(prediction)[0]

        print(f"\nPredicted Cancer Type: {predicted_label}")
        print("Confidence for each type:")
        for i, class_label in enumerate(joblib.load(r"C:\SUDHA\Personal portfolio project\Models\cancer_label_encoder.pkl").classes_):
            print(f"{class_label}: {probabilities[0][i]:.2%}")

    except ValueError as e:
        print("\nInvalid input! Please ensure numerical values (1 or 0) are entered.")

def main():
    name, age, gender = greet_user()
    disease_group = identify_disease_group()

    if disease_group == "diabetes":
        print("\nBased on your symptoms, diabetes testing is recommended. Please get the following tests done to proceed:")
        print("- Blood glucose test\n- HbA1c test\n- Oral glucose tolerance test\n")
        print("Once you have the results, please provide the necessary details.")
        predict_diabetes()
        recommend_specialists("Diabetes")
    elif disease_group == "heart_disease":
        print("\nBased on your symptoms, heart disease testing is recommended. Please get the following tests done to proceed:")
        print("- Blood pressure test\n- Cholesterol levels\n- ECG\n- Stress test\n")
        print("Once you have the results, please provide the necessary details.")
        predict_heart_disease()
        recommend_specialists("Heart Disease")
    elif disease_group == "covid19":
        print("\nBased on your symptoms, COVID-19 testing is recommended.")
        predict_covid()
        recommend_specialists("COVID-19")
    elif disease_group == "cancer":
        print("\nBased on your symptoms, cancer testing is recommended.")
        predict_cancer()
        recommend_specialists("Cancer")
    else:
        print("\nSymptoms unclear. Please consult a specialist.")

if __name__ == "__main__":
    initialize_database()
    main()




  specialists_df = pd.read_csv('C:\SUDHA\Personal portfolio project\datasets\specialists_database_extended.csv')


Welcome to the Medical Diagnosis Chatbot!
I'm here to assist you in identifying potential health concerns.


Thank you, s. Now let's have a look at you and see how you're doing.

Let's go through some symptoms to help identify any health issues you may have.

Based on your symptoms, diabetes testing is recommended. Please get the following tests done to proceed:
- Blood glucose test
- HbA1c test
- Oral glucose tolerance test

Once you have the results, please provide the necessary details.
Enter the following details:





The person is likely to have diabetes with a confidence of 60.00%.

Here are some specialists near you:

Name: Dr. Peter Black
Specialization: Endocrinologist
Address: 596 Health St, Tempe, AZ
Contact: (480) 710-8983
Yelp Ratings: 5.0
Additional Info: www.diabetes-tempe.com
----------------------------------------

Name: Dr. Peter Doe
Specialization: Endocrinologist
Address: 563 Health St, Phoenix, AZ
Contact: (480) 309-8052
Yelp Ratings: 5.0
Additional Info: www.diabetes-phoenix.com
----------------------------------------

Name: Dr. Peter Brown
Specialization: Endocrinologist
Address: 103 SkinCare St, Tempe, AZ
Contact: (480) 884-3121
Yelp Ratings: 5.0
Additional Info: www.diabetes-tempe.com
----------------------------------------

Name: Dr. Emily Green
Specialization: Endocrinologist
Address: 174 SkinCare St, Tempe, AZ
Contact: (480) 167-9034
Yelp Ratings: 4.9
Additional Info: www.diabetes-tempe.com
----------------------------------------

Name: Dr. John Johnson
Specialization: E