In [None]:
!pip install pandas

# MedALpaca LLM

- required GPU RAM > 15GB so
- Device used L4 GPU

In [None]:
!huggingface-cli login

In [None]:
!pip install -q transformers accelerate bitsandbytes gradio

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import gradio as gr

# Load model and tokenizer
model_id = "medalpaca/medalpaca-7b"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda" if torch.cuda.is_available() else "cpu")
model.eval()

# Chat function with history
def chat_interface(message, history):
    # Reconstruct prompt with history
    prompt = ""
    for user_msg, bot_msg in history:
        prompt += f"### Instruction:\n{user_msg}\n\n### Response:\n{bot_msg}\n\n"
    prompt += f"### Instruction:\n{message}\n\n### Response:\n"

    # Tokenize and generate
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=512,
            temperature=0.7,
            do_sample=True,
            top_p=0.9,
            repetition_penalty=1.1,
            pad_token_id=tokenizer.eos_token_id,
        )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Only return new response (strip old prompt)
    if "### Response:" in response:
        response = response.split("### Response:")[-1].strip()

    return response

# Launch Gradio Chat Interface
gr.ChatInterface(
    fn=chat_interface,
    title="🧠 MedAlpaca 7B - Medical Chatbot",
    description="Ask medical or wellness-related questions. Powered by medAlpaca 7B.",
).launch(debug=True, share=True)


# ContactDoctor Bio Medical LLM

In [None]:
!pip install -q transformers accelerate bitsandbytes gradio flash_attn

In [None]:
import torch
from PIL import Image
import gradio as gr
from transformers import AutoModel, AutoTokenizer, BitsAndBytesConfig
from transformers import AutoModelForCausalLM

# ---- Model Setup ----
model_id = "ContactDoctor/Bio-Medical-Llama-3-8B"

bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4",
                                bnb_4bit_use_double_quant=True,
                                bnb_4bit_compute_dtype=torch.float16, )

print("🔄 Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)

print("🔄 Loading model...")
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
    torch_dtype=torch.float16,
    trust_remote_code=True,
)

# ---- Chat Function ----
def chat_only_interface(message, history):

#the tempelet to receive the prompts
    prompt_template = f"""
  You are a wellbeing adviser. Respond to the user's condition using the following format:

  1. Food Recommendation:
  2. Physical Exercise:
  3. Social Wellbeing Recommendation:
  4. Overall Suggestion:

  User Input: "{message}"
  """
    inputs = tokenizer(prompt_template, return_tensors="pt").to(model.device)

    outputs = model.generate(
        **inputs,
        max_new_tokens=512,
        temperature=0.95,
        do_sample=True,
        top_p=0.9,
        repetition_penalty=1.1,
    )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    # Trim to just the model's response part
    generated_text = response.split("User Input:")[1] if "User Input:" in response else response

    with open("chat_history.txt", "a", encoding="utf-8") as f:
        f.write(f"User: {message}\n")
        f.write(f"Model: {response}\n\n")

    return generated_text.strip()

# ---- Chat Interface ----
gr.ChatInterface(
    fn=chat_only_interface,
    title="🧠 Bio-Medical LLaMA 3 Chat",
    description="Ask wellness or medical-related questions.",
).queue().launch(debug=True, share=True)



# Medgemma


In [None]:
!huggingface-cli login

In [None]:
!pip install -q transformers accelerate gradio

In [None]:
from transformers import pipeline
import gradio as gr

# ---- Model Setup ----
model_id = "google/medgemma-4b-it"

print("🔄 Loading pipeline...")
chatbot = pipeline(
    "text-generation",
    model="google/medgemma-4b-it",
    torch_dtype="float32",  # Change this from float16
    device_map="auto"
)


# ---- Chat Function ----
def chat_interface(message, history):
    response = chatbot(
    message,
    max_new_tokens=512,
    temperature=0.7,  # Lower = more stable
    top_p=0.9,
    repetition_penalty=1.1,
    do_sample=True,
)[0]["generated_text"]

    # Save history (optional)
    with open("chat_history_medgemma_pipeline.txt", "a", encoding="utf-8") as f:
        f.write(f"User: {message}\n")
        f.write(f"Model: {response}\n\n")

    return response

# ---- Gradio Chat Interface ----
gr.ChatInterface(
    fn=chat_interface,
    title="🧠 MedGemma 27B - Medical Chatbot",
    description="Ask medical or wellness-related questions.",
).queue().launch(debug=True, share=True)


# DeepSeek Medical Reasoning
- Device used to run the mode L4GPU

In [None]:
!huggingface-cli login

In [None]:
!pip install -U datasets accelerate peft trl bitsandbytes
!pip install -U transformers==4.52.1
!pip install huggingface_hub


In [None]:
!pip install gradio

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import gradio as gr

# Load model & tokenizer
model_id = "kingabzpro/DeepSeek-R1-0528-Qwen3-8B-Medical-Reasoning"

print("🔄 Loading tokenizer and model...")
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.float16,  # or torch.float32 if you get NaN/Inf errors
    device_map="auto",
    trust_remote_code=True
)

# Chat function
def chat_interface(message, history):
    prompt = message
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    outputs = model.generate(
        **inputs,
        max_new_tokens=512,
        temperature=0.7,
        top_p=0.9,
        do_sample=True,
        repetition_penalty=1.1,
    )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Optional: save chat history
    with open("chat_history_deepseek.txt", "a", encoding="utf-8") as f:
        f.write(f"User: {message}\nModel: {response}\n\n")

    return response

# Gradio interface
gr.ChatInterface(
    fn=chat_interface,
    title="🧠 DeepSeek Qwen3 8B - Medical Reasoning Chatbot",
    description="Ask medical reasoning or clinical diagnostic questions."
).queue().launch(debug=True, share=True)


# BioMistral

In [None]:
!pip install transformers accelerate gradio torch
!pip install bitsandbytes

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import gradio as gr

# Load model and tokenizer
model_id = "BioMistral/BioMistral-7B"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda" if torch.cuda.is_available() else "cpu")
model.eval()

# Chat function with history
def chat_interface(message, history):
    # Format prompt with history
    prompt = ""
    for user_input, bot_output in history:
        prompt += f"<s>[INST] {user_input} [/INST] {bot_output} </s>\n"
    prompt += f"<s>[INST] {message} [/INST]"

    # Tokenize and generate
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=512,
            temperature=0.7,
            top_p=0.9,
            do_sample=True,
            repetition_penalty=1.1,
            pad_token_id=tokenizer.eos_token_id,
        )

    decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Extract latest response
    if "[/INST]" in decoded_output:
        response = decoded_output.split("[/INST]")[-1].strip()
    else:
        response = decoded_output.strip()

    return response

# Gradio chat app
gr.ChatInterface(
    fn=chat_interface,
    title="🧬 BioMistral 7B - Biomedical Chatbot",
    description="Ask medical and biomedical questions. Powered by BioMistral-7B.",
).launch(debug=True, share=True)
