In [None]:
from qwencoder import QwenLMHeadModel, AutoTokenizer
import torch

# Initialize tokenizer and model
# Ensure 'Qwen' is the name of the locally installed model, adjust if needed
tokenizer = AutoTokenizer.from_pretrained("Qwen")
model = QwenLMHeadModel.from_pretrained("Qwen")

# Define a function for generating responses
def generate_response(system_prompt, user_prompt, max_length=512):
    """
    Generate a response using a causal language model.
    
    Args:
        system_prompt (str): The system-level instruction for the model.
        user_prompt (str): The user input that the model responds to.
        max_length (int): Maximum length of the generated response.

    Returns:
        str: The generated response.
    """
    # Combine system and user prompts
    full_prompt = f"System: {system_prompt}\nUser: {user_prompt}\nAssistant:"

    # Tokenize input
    inputs = tokenizer(full_prompt, return_tensors="pt", truncation=True, max_length=1024)

    # Generate output tokens
    outputs = model.generate(
        input_ids=inputs["input_ids"],
        attention_mask=inputs["attention_mask"],
        max_length=max_length,
        do_sample=True,  # Enable sampling for more diverse outputs
        temperature=0.7,  # Adjust temperature for creativity
        top_p=0.9  # Nucleus sampling
    )

    # Decode the generated response
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Extract only the assistant's response
    assistant_response = response.split("Assistant:")[-1].strip()
    return assistant_response

# Example usage
if __name__ == "__main__":
    system_prompt = "You are a helpful assistant that provides concise and accurate responses."
    user_prompt = "Can you explain how neural networks work?"

    response = generate_response(system_prompt, user_prompt)
    print("Assistant Response:")
    print(response)
