In [9]:
# Standalone Gemma 3 chat in a notebook cell
# - Works with: google/gemma-3-1b-it (text-only chat)
# - Uses only AutoTokenizer/AutoModelForCausalLM + chat template
# - CUDA/CPU auto-handled; no app imports needed

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

model_id = "google/gemma-3-1b-it"   # change to your local ID if needed

# ---- Load tokenizer & model
#tok = AutoTokenizer.from_pretrained(model_id, use_fast=True)
dtype = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else (
    torch.float16 if torch.cuda.is_available() else torch.float32
)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=dtype,
    device_map="auto" if torch.cuda.is_available() else None
).eval()

# Ensure pad token is set for generation
if tok.pad_token_id is None:
    tok.pad_token = tok.eos_token

# ---- Build a minimal chat and render to tokens
messages = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user",   "content": "Can OpenAI share private conversations to local police, if a conversation is flagged as being potentially related to a crime? is there a direct channel of communication or the police needs to request info to OpenAI if they want evidence ? Provide a clear and short answer"}
]

# Prefer chat template (lets HF format for this model family)
try:
    prompt = tok.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    inputs = tok(prompt, return_tensors="pt")
except Exception:
    # Fallback: simple string if the template isn't available
    fused = "System: You are a helpful assistant.\nUser: Hello?\nAssistant:"
    inputs = tok(fused, return_tensors="pt")

# Move tensors to the model device
inputs = {k: v.to(model.device) for k, v in inputs.items()}
input_len = inputs["input_ids"].shape[1]

# ---- Generate
with torch.no_grad():
    out = model.generate(
        **inputs,
        max_new_tokens=300,
        do_sample=True,
        temperature=0.7,
        top_p=0.9,
        repetition_penalty=1.05,
        eos_token_id=tok.eos_token_id,
        pad_token_id=tok.pad_token_id,
        use_cache=True,
    )

# Decode only the newly generated part
gen_ids = out[0, input_len:]
text = tok.decode(gen_ids, skip_special_tokens=True)
print(text.strip())


No, OpenAI cannot directly share private conversations with law enforcement. Itâ€™s a privacy and security concern. Police need to request information through official channels.yetantytantytantytantytantytantytantytantytantytantytantytantytantytantytantytantytantytantytantytan)
