In [None]:
# Import necessary libraries
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

# Specify the path to the local QwenCoder model and tokenizer
model_path = "<path_to_your_qwencoder_model>"  # Replace with your local model path
tokenizer_path = "<path_to_your_qwencoder_tokenizer>"  # Replace with your tokenizer path

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)

# Load the QwenCoder model and move it to the appropriate device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModelForCausalLM.from_pretrained(model_path).to(device)

# Define a function to generate responses from QwenCoder
def generate_response(system_prompt, user_prompt, max_length=512):
    """
    Generate a response using QwenCoder.

    Args:
        system_prompt (str): The system prompt providing instructions or context.
        user_prompt (str): The user input or query.
        max_length (int): Maximum length of the response.

    Returns:
        str: The generated response.
    """
    try:
        # Combine system prompt and user prompt
        full_prompt = f"System: {system_prompt}\nUser: {user_prompt}\nAssistant:"
        
        # Tokenize the input with truncation and move it to the appropriate device
        inputs = tokenizer(
            full_prompt, 
            return_tensors="pt", 
            truncation=True, 
            max_length=model.config.max_position_embeddings
        ).to(device)
        
        # Generate response
        outputs = model.generate(
            input_ids=inputs["input_ids"],
            attention_mask=inputs["attention_mask"],
            max_length=max_length,
            pad_token_id=tokenizer.eos_token_id,
            num_return_sequences=1,
            do_sample=True,
            temperature=0.7,
            top_k=50
        )
        
        # Decode and return the response
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        return response.split("Assistant:")[-1].strip()
    
    except ValueError as e:
        return f"Error during generation: {str(e)}"

# Example usage
system_prompt = "You are a highly knowledgeable and friendly assistant."
user_prompt = "Can you explain how QwenCoder works?"
response = generate_response(system_prompt, user_prompt)
print("Assistant:", response)