In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Set the paths for the pretrained model and tokenizer
MODEL_PATH = "/path/to/your/pretrained/gemma"  # Replace with the actual path to the model
TOKENIZER_PATH = "/path/to/your/tokenizer"     # Replace with the actual path to the tokenizer

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_PATH)

# Load the model
model = AutoModelForCausalLM.from_pretrained(MODEL_PATH)

# Move model to the appropriate device (CPU or GPU)
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)

# Function to prompt the model
def prompt_gemma(prompt_text, max_length=50):
    """
    Sends a prompt to the Gemma model and retrieves the response.

    Args:
        prompt_text (str): The input text for the model.
        max_length (int): Maximum length of the generated response.

    Returns:
        str: The generated response from the model.
    """
    inputs = tokenizer(prompt_text, return_tensors="pt").to(device)
    outputs = model.generate(inputs.input_ids, max_length=max_length, pad_token_id=tokenizer.eos_token_id)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# Example usage
prompt_text = "Explain the importance of site preparation in construction."
response = prompt_gemma(prompt_text)
print("Gemma's Response:", response)
