In [ ]:
!pip install transformers accelerate bitsandbytes

In [ ]:
from huggingface_hub import login

# This will prompt you to enter your Hugging Face token securely.
login()

In [ ]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Define the model name
model_name = "mistralai/Mistral-7B-Instruct-v0.2"

print(f"Loading tokenizer for {model_name}...")
tokenizer = AutoTokenizer.from_pretrained(model_name)
print("Tokenizer loaded.")

print(f"Loading model for {model_name}...")
# Load the model in 4-bit to save memory
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_4bit=True, # This is crucial for running on Colab's free GPU
    device_map="auto",
    torch_dtype=torch.float16 # Use float16 for better performance and lower memory usage
)
print("Model loaded.")

# Set the model to evaluation mode
model.eval()

In [ ]:
def get_health_response(query):
    # --- Prompt Engineering ---
    # System message to set the persona and guidelines
    system_message = (
        "You are a helpful and friendly medical assistant. "
        "You provide general health information and explanations, "
        "but **NEVER give specific medical advice, diagnoses, or prescribe treatments.** "
        "Always advise users to consult a qualified healthcare professional for personalized medical advice. "
        "Keep your answers clear, concise, and easy to understand. "
        "If a question asks for specific medical advice or a diagnosis, politely state that you cannot provide that and recommend seeing a doctor."
    )

    # User's query
    user_query = query

    # Combine system message and user query for the model
    # Mistral uses a specific format for its instructions, known as "instruction tuning"
    # The prompt template is important for getting good responses from Mistral
    prompt = f"<s>[INST] {system_message}\n\n{user_query} [/INST]"

    # --- Safety Filters (Rule-based for now) ---
    # These are basic, rule-based filters. For robust safety, more advanced techniques
    # like content moderation APIs or fine-tuning models on safety data are used.
    lower_query = query.lower()
    if "diagnose" in lower_query or \
       "prescribe" in lower_query or \
       "treatment for" in lower_query or \
       "should i take" in lower_query or \
       "what drug" in lower_query or \
       "am i having" in lower_query or \
       "is it cancer" in lower_query:
        return "I am a medical assistant and cannot provide medical advice, diagnoses, or prescribe treatments. Please consult a qualified healthcare professional for personalized medical guidance."

    # --- Generate Response from LLM ---
    # Tokenize the input prompt
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048).to(model.device)

    # Generate the response
    with torch.no_grad(): # Disable gradient calculation for inference, saves memory and speeds up
        outputs = model.generate(
            **inputs,
            max_new_tokens=500, # Maximum number of tokens to generate in the response
            temperature=0.7,    # Controls randomness: lower for more deterministic, higher for more creative
            do_sample=True,     # Enable sampling for more varied responses
            top_k=50,           # Consider only the top-k most likely tokens
            top_p=0.95,         # Nucleus sampling: consider tokens until cumulative probability reaches top_p
            pad_token_id=tokenizer.eos_token_id # Important for handling padding
        )

    # Decode the generated tokens back to text
    # We need to slice the output to remove the input prompt
    response_text = tokenizer.decode(outputs[0][len(inputs["input_ids"][0]):], skip_special_tokens=True)

    # --- Post-processing for safety and clarity ---
    # Add a general disclaimer at the end
    if "Please consult a qualified healthcare professional" not in response_text:
        response_text += "\n\n**Disclaimer:** This information is for general knowledge and informational purposes only, and does not constitute medical advice. Please consult a qualified healthcare professional for any health concerns or before making any decisions related to your health or treatment."

    return response_text.strip()

In [ ]:
print("Welcome to the General Health Chatbot! (Type 'quit' to exit)")

while True:
    user_input = input("\nYou: ")
    if user_input.lower() == 'quit':
        print("Chatbot: Goodbye!")
        break

    print("Chatbot (thinking...): ", end="")
    response = get_health_response(user_input)
    print(response)