# Baseline model demo
in This notebook we shows a minimal example to load a  LLM from the Hugging Face Hub, prepare a prompt, and generate a response.
we will use `Open-Orca/Mistral-7B-OPenOrca`

Notes:
- Ensure you have the model weights available or sufficient GPU memory.


In [None]:
# Imports
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

In [None]:
# Set model and device
MODEL_NAME = "Open-Orca/Mistral-7B-OpenOrca"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
prompt = "Question: What methods and tools are used to assess the land-use suitability in the Kuala Terengganu coastal zone, and how does this information support urban planning and decision-making?\nAnswer: "
# load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
    device_map="auto" if DEVICE == "cuda" else None,
)
model.eval()

In [None]:
model_device = next(model.parameters()).device
inputs = tokenizer(prompt, return_tensors="pt").to(model_device)
# generate
with torch.no_grad():
    output = model.generate(
        **inputs,
        max_new_tokens=200,
        do_sample=False,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.pad_token_id,
    )

# decode
answer = tokenizer.decode(output[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
print("###### MODEL ANSWER ######")
print(answer.strip())