In [5]:
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM

# Path to your local Phil-1.5 model
MODEL_PATH = r"D:\ChatBot\phil-1.5"

# Load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, trust_remote_code=True, use_safetensors=True)

# Core response generator
def generate_response(user_input):
    try:
        prompt = (
            "You are a knowledgeable and empathetic AI health assistant.\n"
            "Your task is to clearly explain what may be causing the user's symptom, suggest basic home care tips, "
            "and advise when it's necessary to see a doctor. Always respond in a supportive tone. "
            "Never ask the user questions. Do not include rhetorical questions. "
            "Give a clear, complete answer using supportive and informative language. "
            "NEVER include any of the following phrases: 'Have you', 'Are you', 'Could it be', 'Is it possible that'.\n\n"
            f"User: {user_input.strip()}\n"
            "Assistant:"
        )

        input_ids = tokenizer.encode(prompt, return_tensors="pt")
        output_ids = model.generate(
            input_ids,
            max_new_tokens=350,
            temperature=0.6,              # Slightly reduced for more focused output
            top_k=50,
            top_p=0.9,
            do_sample=True,
            repetition_penalty=1.2,      # Helps reduce repeated or vague replies
            pad_token_id=tokenizer.eos_token_id
        )

        generated = tokenizer.decode(output_ids[0][len(input_ids[0]):], skip_special_tokens=True)

        # Cut off at hallucinated speaker switches
        for stop_word in ["User:", "Patient:", "Q:", "###", "\n\n"]:
            if stop_word in generated:
                generated = generated.split(stop_word)[0]

        return generated.strip()

    except Exception as e:
        return f"⚠️ Sorry, something went wrong while generating a response. Error: {str(e)}"

# Gradio interface logic
def health_bot_interface(user_input):
    return generate_response(user_input)

# Run the Gradio app
if __name__ == "__main__":
    gr.Interface(
        fn=health_bot_interface,
        inputs=gr.Textbox(
            lines=4,
            label="Your Health Concern",
            placeholder="Example: I've had a sore throat and mild fever for 3 days."
        ),
        outputs=gr.Textbox(
            lines=8,
            label="AI Health Assistant's Response"
        ),
        title="🩺 AI Health Assistant (Phil-1.5)",
        description=(
            "Get helpful, clear guidance on your health-related questions. This assistant is powered by a locally hosted AI model (Phil-1.5).\n\n"
            "⚠️ Note: This tool is for educational use only. Always consult a qualified medical professional for real health concerns."
        ),
        theme="soft",
        allow_flagging="never",
        live=True
    ).launch(share=True)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Running on local URL:  http://127.0.0.1:7862
Running on public URL: https://082361d87a9b410fd4.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)
