# Semi-Manual export of model for Llama.cpp

## Load Unsloth Model

In [None]:
from transformers import AutoModel, AutoTokenizer
from unsloth import FastLanguageModel
import torch

base_model_name = "llama-3.2-1b-instruct-lora_model-1epoch"
max_seq_length = 2048
dtype = None
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    # model_name = "Taiwar/" + base_model_name, # or choose "unsloth/Llama-3.2-1B-Instruct"
    model_name = "../models/" + base_model_name, # Local model
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit
)

## 2. Save model in merged 16bit format

In [None]:
model.save_pretrained_merged("models/llama-3.2-1b-instruct-lora-1poch_merged16b", tokenizer, save_method = "merged_16bit",)

## 3. Run Llama.cpp
See https://github.com/unslothai/unsloth/wiki#manually-saving-to-gguf

## 4. Push model to HF

In [None]:
from huggingface_hub import HfApi
hf_token = open(".hftoken").read().strip()
api = HfApi(token=hf_token)

model_id = "Taiwar/llama-3.2-1b-instruct-lora_model-1epoch"
api.upload_file(
    path_or_fileobj="../models/llama-3.2-1b-instruct-lora_merged-1epoch-16b-gguf/llama-3.2-1b-instruct-lora_merged-1epoch-16b.gguf",
    path_in_repo="llama-3.2-1b-instruct-lora_merged-1epoch-16b.gguf",
    repo_id=model_id,
)