In [1]:
# Cell 1: Import Libraries

import PyPDF2
import spacy
import sqlite3



A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "C:\Users\Gaurav Gupta\anaconda\Lib\site-packages\ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "C:\Users\Gaurav Gupta\anaconda\Lib\site-packages\traitlets\config\application.py", line 992, in launch_instance
    app.start()
  File "C:\Users\Gaurav Gupta\anaconda\Lib\site-packages\ipykernel\kernelapp.py", line 736, in start
    self.io_loop.start()
  File "C:\Users\Gaurav Gupta

ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject

In [None]:
# Cell 2: Text Extraction Functions
def extract_text_from_pdf(file_path):
    with open(file_path, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        text = ""
        for page in reader.pages:
            text += page.extract_text()
        return text

def extract_text_from_txt(file_path):
    with open(file_path, 'r') as file:
        text = file.read()
        return text


In [None]:
# Cell 3: Symptom Extraction Function
nlp = spacy.load("en_core_web_sm")

known_symptoms = {
    'fatigue': 'S001',
    'chest pain': 'S002',
    'cough': 'S003',
    'shortness of breath': 'S004'
}

def extract_symptoms_from_text(text):
    doc = nlp(text.lower())
    extracted_symptoms = []

    for token in doc:
        if token.lemma_ in known_symptoms:
            symptom_id = known_symptoms[token.lemma_]
            if symptom_id not in extracted_symptoms:
                extracted_symptoms.append(symptom_id)

    return extracted_symptoms


In [None]:
# Cell 4: Database Setup
def setup_database():
    conn = sqlite3.connect('medical_data.db')
    c = conn.cursor()
    
    # Create tables
    c.execute('''CREATE TABLE IF NOT EXISTS Disease (
                    Disease_ID INTEGER PRIMARY KEY,
                    Disease_Name TEXT,
                    Description TEXT
                )''')

    c.execute('''CREATE TABLE IF NOT EXISTS Symptom_Disease_Mapping (
                    Symptom_ID TEXT,
                    Disease_ID INTEGER,
                    Probability REAL,
                    FOREIGN KEY (Disease_ID) REFERENCES Disease(Disease_ID)
                )''')

    # Insert sample data
    c.execute("INSERT INTO Disease (Disease_Name, Description) VALUES ('Hypertension', 'A condition in which the blood pressure in the arteries is elevated.')")
    c.execute("INSERT INTO Disease (Disease_Name, Description) VALUES ('Influenza', 'A viral infection that attacks the respiratory system.')")
    
    # Insert symptom-disease mappings
    c.execute("INSERT INTO Symptom_Disease_Mapping (Symptom_ID, Disease_ID, Probability) VALUES ('S001', 1, 0.75)")
    c.execute("INSERT INTO Symptom_Disease_Mapping (Symptom_ID, Disease_ID, Probability) VALUES ('S002', 1, 0.85)")
    c.execute("INSERT INTO Symptom_Disease_Mapping (Symptom_ID, Disease_ID, Probability) VALUES ('S003', 2, 0.90)")
    
    conn.commit()
    conn.close()

setup_database()  # Run this cell only once to set up the database


In [None]:
def get_possible_diseases(symptom_ids):
    conn = sqlite3.connect('medical_data.db')
    c = conn.cursor()

    query = f'''
        SELECT d.Disease_Name, d.Description, s.Probability
        FROM Symptom_Disease_Mapping s
        JOIN Disease d ON s.Disease_ID = d.Disease_ID
        WHERE s.Symptom_ID IN ({','.join('?' * len(symptom_ids))})
        ORDER BY s.Probability DESC
    '''
    c.execute(query, symptom_ids)
    results = c.fetchall()

    if results:
        print("Based on the symptoms you’ve provided, here are the possible conditions we found:\n")
        for row in results:
            disease_name, description, probability = row
            if probability > 0.8:
                print(f"It’s highly likely that you may have *{disease_name}*.")
                print(f"Description: {description}")
                print(f"With a {int(probability * 100)}% probability, I recommend consulting a healthcare provider to confirm this diagnosis.\n")
            elif 0.5 <= probability <= 0.8:
                print(f"You may have *{disease_name}*, with a probability of {int(probability * 100)}%.")
                print(f"Description: {description}. However, further symptoms or tests might be needed to verify this condition.\n")
            else:
                print(f"There’s a slight chance that you could have *{disease_name}* (Probability: {int(probability * 100)}%).")
                print(f"This condition is less likely based on the symptoms you’ve given, but keep monitoring and consult your doctor if symptoms persist.\n")
    else:
        print("I'm sorry, but I couldn’t find any matching diseases for the symptoms you’ve provided. Consider adding more details or consulting a healthcare professional.")
    
    conn.close()


In [None]:
import ipywidgets as widgets
from IPython.display import display

In [None]:
# Handle uploaded file and communicate
def handle_uploaded_file(change):
    if uploader.value:
        fileinfo = list(uploader.value.values())[0]
        report_content = fileinfo['content']
        report_text = report_content.decode('utf-8')
        
        symptoms = extract_symptoms_from_text(report_text)

        if symptoms:
            print("Symptoms extracted from report:", symptoms)
            diseases = get_possible_diseases(symptoms)
            if diseases:
                print("Possible diseases based on symptoms:")
                for row in diseases:
                    print(f"Disease: {row[0]}, Description: {row[1]}, Probability: {row[2]}")
            else:
                print("No diseases found for the given symptoms.")
        else:
            print("No symptoms detected in the report. Please provide more detailed information.")



In [None]:
# Cell 3: Main Function
def main():
    print("Welcome to the Medical Diagnosis System")
    
    option = input("Enter '1' for manual input of symptoms, '2' to upload a health checkup report: ")

    if option == '1':
        symptoms_input = input("Enter symptom IDs separated by commas (e.g., S001,S002): ").split(',')
        get_possible_diseases(symptoms_input)
    
    elif option == '2':
        # Create file upload widget
        global uploader  # Declare uploader as global to use in the handler
        uploader = widgets.FileUpload(
            accept='.txt,.pdf',  # Acceptable file types
            multiple=False  # Disable multiple file uploads
        )
        
        display(uploader)

        uploader.observe(handle_uploaded_file, names='value')  # Observe the file upload

    else:
        print("Invalid option. Please enter '1' or '2'.")

main()  # Call the main function to run the application
