# AI-powered chatbot for initial patient screening.
Healthcare chatbot with symptom analysis for initial patient screening.



In [8]:

!pip install transformers bitsandbytes accelerate gradio


Collecting bitsandbytes
  Downloading bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl.metadata (2.9 kB)
Collecting gradio
  Downloading gradio-5.9.0-py3-none-any.whl.metadata (16 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.6-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.5.2 (from gradio)
  Downloading gradio_client-1.5.2-py3-none-any.whl.metadata (7.1 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.19-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.2.2 (from gradio)
  Downloading ruff-0.8.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 (from gradio)
  Downloading safehttpx-0.1.6-py3-none-any.whl.metadata (4.2 kB)
Collecting semantic-version~=2.0 (from gradio)
  Downloading semantic_version-2.10.0-py2.py3-none-any.whl.metadata (9.7 

In [9]:

import torch
print("GPU available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("Using GPU:", torch.cuda.get_device_name(0))
else:
    print("No GPU detected. Switch to a GPU runtime in Colab.")


GPU available: True
Using GPU: Tesla T4


In [10]:
!pip install bitsandbytes




In [11]:
!pip uninstall -y bitsandbytes

!pip install bitsandbytes==0.45.0


Found existing installation: bitsandbytes 0.45.0
Uninstalling bitsandbytes-0.45.0:
  Successfully uninstalled bitsandbytes-0.45.0
Collecting bitsandbytes==0.45.0
  Using cached bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl.metadata (2.9 kB)
Using cached bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl (69.1 MB)
Installing collected packages: bitsandbytes
Successfully installed bitsandbytes-0.45.0


In [12]:
import bitsandbytes
print("BitsAndBytes version:", bitsandbytes.__version__)


BitsAndBytes version: 0.45.0


In [13]:

from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

# Model setup
model_name = "ruslanmv/Medical-Llama3-8B"
device_map = 'auto'

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True  # Ensure compatibility
)

# Load the model and tokenizer
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    trust_remote_code=True,
    use_cache=False,
    device_map=device_map
)

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

print("Model and tokenizer loaded successfully!")


config.json:   0%|          | 0.00/755 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/121 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.08M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

Model and tokenizer loaded successfully!


In [14]:

def askme(question):
    sys_message = '''
    You are an AI Medical Assistant trained on a vast dataset of health information. Please be thorough and
    provide an informative answer. If you don't know the answer to a specific medical inquiry, advise seeking professional help.
    '''

    # Construct the prompt
    prompt = f"{sys_message}\n\nUser: {question}\n\nAssistant:"

    # Tokenize the input
    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to("cuda")

    # Generate response
    outputs = model.generate(
        inputs["input_ids"],
        attention_mask=inputs["attention_mask"],
        max_new_tokens=150,
        pad_token_id=tokenizer.pad_token_id
    )

    # Decode and return the response
    response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response_text.split("Assistant:")[-1].strip()


In [15]:

def initial_screening():
    print("Welcome to the initial patient screening tool.")

    # Collect patient details
    age = input("Please enter your age: ")
    gender = input("Please specify your gender (male/female/other): ")
    symptoms = input("Describe your primary symptoms (e.g., cough, fever, fatigue): ")
    duration = input("How long have you been experiencing these symptoms? (e.g., 3 days, 2 weeks): ")

    # Formulate the question
    question = f"I am a {age}-year-old {gender} experiencing {symptoms} for {duration}. What could be the possible causes and next steps?"

    # Generate AI response
    response = askme(question)
    print("\nAI's Response:\n", response)

# Uncomment to run the tool
# initial_screening()


In [16]:

# Initial screening function
def initial_screening():
    sys_message = '''
    You are an AI Medical Assistant designed to perform initial patient screening.
    Please ask the user for their details, including age, gender, symptoms, and duration of symptoms.
    Based on this, suggest potential conditions or advise whether the patient should consult a healthcare professional.
    '''

    print("Welcome to the initial patient screening tool.")

    # Collect patient details
    age = input("Please enter your age: ")
    gender = input("Please specify your gender (male/female/other): ")
    symptoms = input("Describe your primary symptoms (e.g., cough, fever, fatigue): ")
    duration = input("How long have you been experiencing these symptoms? (e.g., 3 days, 2 weeks): ")

    # Combine patient details into a structured prompt
    question = f"I am a {age}-year-old {gender} experiencing {symptoms} for {duration}. Could you provide potential causes and next steps?"

    # Generate AI response
    def askme(question):
        prompt = f"{sys_message}\n\nUser: {question}\n\nAssistant:"
        inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to("cuda")
        outputs = model.generate(inputs["input_ids"], attention_mask=inputs["attention_mask"], max_new_tokens=150, pad_token_id=tokenizer.pad_token_id)
        response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
        return response_text.split("Assistant:")[-1].strip()

    # Get and display the AI's response
    response = askme(question)
    print("\nAI's Response:\n", response)

# Run the screening tool
initial_screening()

Welcome to the initial patient screening tool.


Please enter your age:  25
Please specify your gender (male/female/other):  female
Describe your primary symptoms (e.g., cough, fever, fatigue):  stomach pain , fever, coughing , chest pain 
How long have you been experiencing these symptoms? (e.g., 3 days, 2 weeks):  2 days



AI's Response:
 Sure! Based on your symptoms, here are some possible conditions:

    1. Stomach flu
    2. Food poisoning
    3. Appendicitis
    4. Pneumonia

    Please note that these are just suggestions and may not apply to everyone. It's always best to consult a healthcare professional for proper diagnosis and treatment.


In [18]:
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

# Check GPU availability
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device: {device}")

# Model and tokenizer setup
model_name = "ruslanmv/Medical-Llama3-8B"

# Load the model and tokenizer
try:
    # Configuration for BitsAndBytes 4-bit quantization
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_use_double_quant=True
    )

    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        device_map="auto",
        trust_remote_code=True
    )

    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
    tokenizer.pad_token = tokenizer.eos_token
    print("Model and tokenizer loaded successfully!")
except Exception as e:
    print(f"Error loading model or tokenizer: {e}")
    raise

# Patient screening function
def initial_screening_ui(age, gender, symptoms, duration):
    try:
        # Validate inputs
        if not age or not gender or not symptoms or not duration:
            return "Please fill in all the fields with valid information."

        # System message
        sys_message = '''
        You are an AI Medical Assistant designed to perform initial patient screening.
        Please ask the user for their details, including age, gender, symptoms, and duration of symptoms.
        Based on this, suggest potential conditions or advise whether the patient should consult a healthcare professional.
        '''

        # Combine patient details into a structured question
        question = f"I am a {age}-year-old {gender} experiencing {symptoms} for {duration}. Could you provide potential causes and next steps?"

        # Construct the prompt
        prompt = f"{sys_message}\n\nUser: {question}\n\nAssistant:"
        inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(device)

        # Generate AI response
        outputs = model.generate(
            inputs["input_ids"],
            attention_mask=inputs["attention_mask"],
            max_new_tokens=150,
            pad_token_id=tokenizer.pad_token_id
        )
        response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
        response = response_text.split("Assistant:")[-1].strip()

        # Format the response
        return f"### Patient Details\n- **Age**: {age}\n- **Gender**: {gender}\n- **Symptoms**: {symptoms}\n- **Duration**: {duration}\n\n### AI Screening Results\n{response}"
    except Exception as e:
        return f"An error occurred: {e}"

# Custom CSS for pink buttons
custom_css = """
/* Custom pink buttons */
button {
    background-color: #ff69b4 !important;
    color: white !important;
    font-weight: bold !important;
    border-radius: 8px !important;
}
button:hover {
    background-color: #ff85c1 !important;
}
"""

# Create a Gradio interface for the patient screener
interface = gr.Interface(
    fn=initial_screening_ui,  # Function to run the patient screening
    inputs=[
        gr.Number(label="Enter your age"),  # Input field for age
        gr.Dropdown(choices=["male", "female", "other"], label="Select your gender"),  # Dropdown for gender
        gr.Textbox(lines=3, placeholder="Describe your primary symptoms (e.g., cough, fever, fatigue)", label="Describe your symptoms"),  # Textbox for symptoms
        gr.Textbox(lines=1, placeholder="How long have you been experiencing these symptoms? (e.g., 3 days, 2 weeks)", label="Duration of symptoms"),  # Textbox for duration
    ],
    outputs=gr.Markdown(label="Screening Results"),  # Output as Markdown for structured formatting
    title="AI Medical Assistant - Initial Patient Screening",
    description="Provide your age, gender, symptoms, and duration of symptoms, and the AI Medical Assistant will analyze the information to provide possible causes and actionable next steps. For critical or specific issues, always consult a healthcare professional.",
    css=custom_css  # Add custom CSS for pink buttons
)

# Launch the Gradio interface
if __name__ == "__main__":
    interface.launch(share=True)


Device: cuda


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Model and tokenizer loaded successfully!
* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://5a75dd96a7cb717edf.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
