In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel, PeftConfig

# Load base model (e.g., LLaMA)
base_model = AutoModelForCausalLM.from_pretrained(
    "unsloth/Llama-3.2-11B-Vision-Instruct",  # Replace with your base model
    device_map="auto",
    torch_dtype=torch.float16
)

# Load LoRA adapter
model = PeftModel.from_pretrained(
    base_model,
    "MykMaks/llama32_1_MykMaks_fmsudgivelser",  # Your LoRA adapter
    adapter_name="my_lora"
)


In [None]:
# Merge LoRA weights into the base model
merged_model = model.merge_and_unload()

# Save the merged model
merged_model.save_pretrained("merged_model")
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
tokenizer.save_pretrained("merged_model")


In [None]:
# Test inference
input_text = "Translate to French: Hello, how are you?"
inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
outputs = merged_model.generate(**inputs)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
