In [2]:
# Gemma 3 Notebook: Download and Run Instruction-Tuned 1B Model

# 1️⃣ Install dependencies (run in a notebook cell)
! pip install -U transformers accelerate bitsandbytes torch --quiet

# 2️⃣ Import required libraries
from transformers import pipeline, AutoTokenizer, BitsAndBytesConfig, Gemma3ForCausalLM
import torch

# 3️⃣ Choose model ID
MODEL_ID = "google/gemma-3-1b-it"  # instruction-tuned 1B

# 4️⃣ Set device (GPU if available)
device = 0 if torch.cuda.is_available() else -1
dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32



You should consider upgrading via the '/Users/nuzkhan/Downloads/medical chatbot/medchatbot/bin/python3 -m pip install --upgrade pip' command.[0m


In [3]:
# 5️⃣ Option 1: Use pipeline for quick testing
print("=== Using pipeline API ===")
pipe = pipeline(
    "text-generation",
    model=MODEL_ID,
    device=device,
    torch_dtype=dtype
)

messages = [
    [
        {
            "role": "system",
            "content": [{"type": "text", "text": "You are a helpful assistant."}]
        },
        {
            "role": "user",
            "content": [{"type": "text", "text": "Explain the side effects of aspirin in simple terms."}]
        },
    ]
]

output = pipe(messages, max_new_tokens=100)
print("Pipeline Output:\n", output[0]['generated_text'])



=== Using pipeline API ===


Device set to use cpu


TypeError: list indices must be integers or slices, not str

In [None]:
# 6️⃣ Option 2: Load model manually with quantization (memory-efficient)
print("\n=== Loading model manually with 8-bit quantization ===")
quant_config = BitsAndBytesConfig(load_in_8bit=True)

model = Gemma3ForCausalLM.from_pretrained(
    MODEL_ID,
    quantization_config=quant_config
).eval()

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

# Prepare inputs
inputs = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt=True,
    tokenize=True,
    return_dict=True,
    return_tensors="pt"
).to(model.device).to(dtype)

# Generate output
with torch.inference_mode():
    outputs = model.generate(**inputs, max_new_tokens=100)

decoded = tokenizer.batch_decode(outputs)
print("Manual Model Output:\n", decoded)


In [5]:
# =========================================
# Gemma 3 Medical Chatbot Notebook
# =========================================

# 1️⃣ Install required libraries (run once)
! pip install -U transformers accelerate bitsandbytes torch --quiet

# 2️⃣ Import required libraries
from transformers import pipeline, AutoTokenizer, BitsAndBytesConfig, Gemma3ForCausalLM
import torch

# 3️⃣ Model ID for instruction-tuned 1B
MODEL_ID = "google/gemma-3-1b-it"

# 4️⃣ Set device (GPU if available)
device = 0 if torch.cuda.is_available() else -1
dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32

# 5️⃣ Define chat messages with medical disclaimer
messages = [
    [
        {
            "role": "system",
            "content": [
                {
                    "type": "text",
                    "text": "You are a helpful medical assistant. Always include a disclaimer: "
                            "'This information is for educational purposes only and not a substitute for professional medical advice.'"
                }
            ]
        },
        {
            "role": "user",
            "content": [{"type": "text", "text": "What are the side effects of aspirin?"}]
        },
    ]
]

# 6️⃣ Option 1: Quick test with pipeline API
print("=== Pipeline API Test ===")
pipe = pipeline(
    "text-generation",
    model=MODEL_ID,
    device=device,
    torch_dtype=dtype
)








# Use the pipeline output correctly for chat-style inputs
output = pipe(messages, max_new_tokens=150, temperature=0.7, top_p=0.9)

# For instruction-tuned chat pipeline, the output is nested
# Usually output[0][0]['generated_text']
print("Pipeline Output:\n", output[0][0]['generated_text'])


# 7️⃣ Option 2: Manual model loading with 8-bit quantization (memory-efficient)
print("\n=== Manual Model Loading with 8-bit Quantization ===")
quant_config = BitsAndBytesConfig(load_in_8bit=True)

model = Gemma3ForCausalLM.from_pretrained(
    MODEL_ID,
    quantization_config=quant_config
).eval()

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

# Prepare inputs with chat template
inputs = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt=True,
    tokenize=True,
    return_dict=True,
    return_tensors="pt"
).to(model.device).to(dtype)

# Generate output
with torch.inference_mode():
    outputs = model.generate(
        **inputs,
        max_new_tokens=150,
        temperature=0.7,
        top_p=0.9
    )

decoded = tokenizer.batch_decode(outputs)
print("Manual Model Output:\n", decoded)

# =========================================
# ✅ Notes:
# - For longer answers, increase max_new_tokens
# - Adjust temperature/top_p for creativity vs. safety
# - For RAG integration: replace user prompt with retrieved medical data
# =========================================


You should consider upgrading via the '/Users/nuzkhan/Downloads/medical chatbot/medchatbot/bin/python3 -m pip install --upgrade pip' command.[0m
=== Pipeline API Test ===


Device set to use cpu


Pipeline Output:
 [{'role': 'system', 'content': [{'type': 'text', 'text': "You are a helpful medical assistant. Always include a disclaimer: 'This information is for educational purposes only and not a substitute for professional medical advice.'"}]}, {'role': 'user', 'content': [{'type': 'text', 'text': 'What are the side effects of aspirin?'}]}, {'role': 'assistant', 'content': "Okay, here’s information about the side effects of aspirin, presented in a helpful and informative way. **This information is for educational purposes only and not a substitute for professional medical advice.**  It’s important to discuss any concerns you have about aspirin with your doctor or pharmacist.\n\n**Side Effects of Aspirin**\n\nAspirin is a widely used medication with many benefits, but it can also cause side effects. It’s important to be aware of these potential issues, especially if you’re taking it regularly or have existing health conditions. Here's a breakdown of the common and less common si

ImportError: The installed version of bitsandbytes (<0.43.1) requires CUDA, but CUDA is not available. You may need to install PyTorch with CUDA support or upgrade bitsandbytes to >=0.43.1.

In [6]:
# =========================================
# Gemma 3 Medical Chatbot Notebook (Mac-friendly)
# =========================================

! pip install -U transformers accelerate torch --quiet

from transformers import pipeline, AutoTokenizer, Gemma3ForCausalLM
import torch

MODEL_ID = "google/gemma-3-1b-it"

# Device setup
device = -1  # CPU (Mac does not have CUDA)
dtype = torch.float32  # Use float32 on CPU

# Messages with medical disclaimer
messages = [
    [
        {
            "role": "system",
            "content": [
                {
                    "type": "text",
                    "text": "You are a helpful medical assistant. Always include a disclaimer: "
                            "'This information is for educational purposes only and not a substitute for professional medical advice.'"
                }
            ]
        },
        {
            "role": "user",
            "content": [{"type": "text", "text": "What are the side effects of aspirin?"}]
        },
    ]
]

# 1️⃣ Using pipeline (easier, Mac-compatible)
print("=== Pipeline API ===")
pipe = pipeline(
    "text-generation",
    model=MODEL_ID,
    device=device,
    torch_dtype=dtype
)

output = pipe(messages, max_new_tokens=150, temperature=0.7, top_p=0.9)
print("Pipeline Output:\n", output[0][0]['generated_text'])

# 2️⃣ Manual model loading without CUDA
print("\n=== Manual Model Loading (CPU) ===")
model = Gemma3ForCausalLM.from_pretrained(MODEL_ID).eval()
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

inputs = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt=True,
    tokenize=True,
    return_dict=True,
    return_tensors="pt"
).to(dtype)

with torch.inference_mode():
    outputs = model.generate(
        **inputs,
        max_new_tokens=150,
        temperature=0.7,
        top_p=0.9
    )

decoded = tokenizer.batch_decode(outputs)
print("Manual Model Output:\n", decoded)


You should consider upgrading via the '/Users/nuzkhan/Downloads/medical chatbot/medchatbot/bin/python3 -m pip install --upgrade pip' command.[0m
=== Pipeline API ===


Device set to use cpu


Pipeline Output:
 [{'role': 'system', 'content': [{'type': 'text', 'text': "You are a helpful medical assistant. Always include a disclaimer: 'This information is for educational purposes only and not a substitute for professional medical advice.'"}]}, {'role': 'user', 'content': [{'type': 'text', 'text': 'What are the side effects of aspirin?'}]}, {'role': 'assistant', 'content': 'Okay, I can certainly help you with that! Here’s a breakdown of the potential side effects of aspirin, presented in a helpful and informative way. **Please remember, this is for educational purposes only and not a substitute for professional medical advice.** Always consult with your doctor or healthcare provider before taking any medication, including aspirin.\n\n**Common Side Effects (Generally Mild):**\n\n*   **Stomach Upset:** This is the most common side effect. It can include nausea, heartburn, indigestion, bloating, and abdominal pain.\n*   **Increased Bleeding Risk:** This is the *most important* asp

Attempting to cast a BatchEncoding to type torch.float32. This is not supported.


Manual Model Output:
 ["<bos><start_of_turn>user\nYou are a helpful medical assistant. Always include a disclaimer: 'This information is for educational purposes only and not a substitute for professional medical advice.'\n\nWhat are the side effects of aspirin?<end_of_turn>\n<start_of_turn>model\nOkay, I can certainly help you with that! Here’s a breakdown of the potential side effects of aspirin, presented in a way that’s helpful and includes a disclaimer:\n\n**Side Effects of Aspirin**\n\nAspirin, while a widely used medication, can have a range of side effects. It’s important to remember that not everyone experiences them, and the severity can vary. Here’s a breakdown of common and less common side effects:\n\n**Common Side Effects (Generally Mild):**\n\n*   **Upset Stomach/Heartburn:** This is probably the most frequently reported side effect. Aspirin can irritate the stomach lining, leading to indigestion, nausea, or heartburn.\n*   **Bloating:** Some"]
