In [None]:
print("Starting")
!nvidia-smi

Starting
Mon Jan  5 10:31:01 2026       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   36C    P8              9W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                       

In [None]:
# Install Unsloth + training stack
!pip install --upgrade unsloth bitsandbytes accelerate xformers datasets peft trl sentencepiece protobuf PyPDF2

Collecting datasets
  Using cached datasets-4.4.2-py3-none-any.whl.metadata (19 kB)
Collecting trl
  Using cached trl-0.26.2-py3-none-any.whl.metadata (11 kB)


In [None]:
import unsloth
import torch
import PyPDF2
from datasets import Dataset
from unsloth import FastLanguageModel
from trl import SFTTrainer
from transformers import TrainingArguments

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [None]:
# Confirm installation
print("Unsloth:", unsloth.__version__, "CUDA:", torch.cuda.is_available())

Unsloth: 2026.1.1 CUDA: True


# Make a folder named `books` and add pdf files to it

In [None]:
import PyPDF2
import os

books_dir = "./books/"
text_chunks = []

if not os.path.exists(books_dir):
    print(f"Error: Directory {books_dir} does not exist.")
else:
    # Iterate through all files in the directory
    for filename in os.listdir(books_dir):
        if filename.lower().endswith(".pdf"):
            pdf_path = os.path.join(books_dir, filename)
            print(f"Processing: {filename}...")

            try:
                with open(pdf_path, "rb") as f:
                    reader = PyPDF2.PdfReader(f)
                    for page in reader.pages:
                        text = page.extract_text()
                        if text:
                            text_chunks.append(text)
                print(f" - Completed {filename}")

            except PyPDF2.errors.PdfReadError:
                print(f" - Error: {filename} is corrupted or incomplete.")
            except Exception as e:
                print(f" - An error occurred with {filename}: {e}")

print(f"\nTotal extracted pages from all files: {len(text_chunks)}")

Processing: cuda-programming.pdf...
 - Completed cuda-programming.pdf
Processing: cudabook.pdf...
 - Completed cudabook.pdf
Processing: CUDA_C_Programming_Guide.pdf...
 - Completed CUDA_C_Programming_Guide.pdf
Processing: cuda_by_example.book.pdf...
 - Completed cuda_by_example.book.pdf

Total extracted pages from all files: 1700


In [None]:
# Convert pages to instruction format
data = []
for i, page in enumerate(text_chunks):
    data.append({
        "input": "",             # No explicit prompt
        "output": page.strip()
    })

dataset = Dataset.from_list(data)
dataset = dataset.train_test_split(test_size=0.05)
dataset["train"][0]

{'input': '',
 'output': "ptg&&&222 000 333777 , , , 111 ***\x03\x03\x03, ,,6 6 67 7 72 2 2* * *5 5 5$ $ $0 0 06 6 6\n/one.lf/eight.lf/one.lf\x1c\x11\x17\x03888 +++\n6LQFH\x03ZH\x03GR\x03YHU\\\x03OLWWOH\x03ZRUN\x03LQ\x03WKH\x03NHUQHO\x0f\x03LW\x03LV\x03TXLWH\x03OLNHO\\\x03WKDW\x03WKH\x03DWRPLF\x03RSHUD -\nWLRQ\x03RQ\x03JOREDO\x03PHPRU\\\x03LV\x03FDXVLQJ\x03WKH\x03SUREOHP\x11\x03(VVHQWLDOO\\\x0f\x03ZKHQ\x03WKRXVDQGV\x03\nRI\x03WKUHDGV\x03DUH\x03WU\\LQJ\x03WR\x03DFFHVV\x03D\x03KDQGIXO\x03RI\x03PHPRU\\\x03ORFDWLRQV\x0f\x03D\x03JUHDW\x03GHDO\x03RI\x03\nFRQWHQWLRQ\x03IRU\x03RXU\x03\x15\x18\x19\x03KLVWRJUDP\x03ELQV\x03FDQ\x03RFFXU\x11\x037R\x03HQVXUH\x03DWRPLFLW\\\x03RI\x03WKH\x03LQFUH -\nPHQW\x03RSHUDWLRQV\x0f\x03WKH\x03KDUGZDUH\x03QHHGV\x03WR\x03VHULDOL]H\x03RSHUDWLRQV\x03WR\x03WKH\x03VDPH\x03PHPRU\\\x03\nORFDWLRQ\x11\x037KLV\x03FDQ\x03UHVXOW\x03LQ\x03D\x03ORQJ\x03TXHXH\x03RI\x03SHQGLQJ\x03RSHUDWLRQV\x0f\x03DQG\x03DQ\\\x03SHUIRU -\nPDQFH\x03JDLQ\x03ZH\x03PLJKW\x03KDYH\x03KDG\x03ZLOO\x03YDQ

In [None]:
MODEL_NAME = "unsloth/Llama-3.2-3B-Instruct"
MAX_SEQ_LEN = 2048

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=MODEL_NAME,
    max_seq_length=MAX_SEQ_LEN,
    dtype=None,
    load_in_4bit=False,      # QLoRA quantized base
)

==((====))==  Unsloth 2026.1.1: Fast Llama patching. Transformers: 4.57.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.1+cu128. CUDA: 7.5. CUDA Toolkit: 12.8. Triton: 3.5.1
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.33.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors.index.json: 0.00B [00:00, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/234 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

chat_template.jinja: 0.00B [00:00, ?B/s]

In [None]:
LORA_R = 16
LORA_ALPHA = 16
TARGET_MODULES = [
    "q_proj", "k_proj", "v_proj", "o_proj",
    "gate_proj", "up_proj", "down_proj"
]

model = FastLanguageModel.get_peft_model(
    model,
    r=LORA_R,
    target_modules=TARGET_MODULES,
    lora_alpha=LORA_ALPHA  # CHANGED: 'alpha' -> 'lora_alpha'
)

Unsloth: Already have LoRA adapters! We shall skip this step.


In [None]:
# 1. Define the formatting function
def formatting_prompts_func(examples):
    # This handles the "input" and "output" columns you created earlier
    outputs = examples["output"]

    texts = []
    for output in outputs:
        # Since your 'input' is empty, we just feed the text content (output)
        # If you wanted a specific prompt format, you would add it here.
        text = f"{output}"
        texts.append(text)
    return texts

# 2. Setup Training Arguments (Same as before)
train_args = TrainingArguments(
    output_dir="./fine_tuned_llama3.2_book",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    learning_rate=1e-4,
    fp16=True,

    num_train_epochs=50,          # Stops after 1 full pass of dataset
    save_strategy="epoch",       # Save only at the end of each epoch
    logging_strategy="epoch",    # Log metrics only at the end of each epoch
)

# 3. Initialize Trainer with the formatting function
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    peft_config=None,
    tokenizer=tokenizer,
    args=train_args,
    formatting_func=formatting_prompts_func, # <--- ADDED THIS LINE
)

Unsloth: Tokenizing ["text"] (num_proc=4):   0%|          | 0/1615 [00:00<?, ? examples/s]

Unsloth: Tokenizing ["text"] (num_proc=4):   0%|          | 0/85 [00:00<?, ? examples/s]

In [None]:
trainer.train()

The model is already on multiple devices. Skipping the move to device specified in `args`.
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 1,615 | Num Epochs = 50 | Total steps = 10,100
O^O/ \_/ \    Batch size per device = 1 | Gradient accumulation steps = 8
\        /    Data Parallel GPUs = 1 | Total batch size (1 x 8 x 1) = 8
 "-____-"     Trainable parameters = 24,313,856 of 3,237,063,680 (0.75% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
202,1.9523


In [None]:
model.save_pretrained("./llama3.2_book_finetuned_" + num_train_epochs + "_steps")
tokenizer.save_pretrained("./llama3.2_book_finetuned_" + num_train_epochs + "_steps")

('./llama3.2_book_finetuned/tokenizer_config.json',
 './llama3.2_book_finetuned/special_tokens_map.json',
 './llama3.2_book_finetuned/chat_template.jinja',
 './llama3.2_book_finetuned/tokenizer.json')

In [None]:
# Save the full merged model (combines base model + your 115MB adapters)
model.save_pretrained_merged(
    "./llama3.2_book_finetuned_" + num_train_epochs + "_steps_FULL",
    tokenizer,
    save_method = "merged_16bit",  # Options: "merged_16bit", "merged_4bit", "lora"
)

print("Full model saved to ./llama3.2_book_finetuned_" + num_train_epochs + "_steps_FULL")

Found HuggingFace hub cache directory: /root/.cache/huggingface/hub
Checking cache directory for required files...


Unsloth: Copying 2 files from cache to `llama3.2_book_finetuned_FULL`: 100%|██████████| 2/2 [01:46<00:00, 53.00s/it]


Successfully copied all 2 files from cache to `llama3.2_book_finetuned_FULL`
Checking cache directory for required files...
Cache check failed: tokenizer.model not found in local cache.
Not all required files found in cache. Will proceed with downloading.


Unsloth: Preparing safetensor model files: 100%|██████████| 2/2 [00:00<00:00, 18078.90it/s]
Unsloth: Merging weights into 16bit: 100%|██████████| 2/2 [02:57<00:00, 88.91s/it]


Unsloth: Merge process complete. Saved to `/content/llama3.2_book_finetuned_FULL`
Full model saved to ./llama3.2_book_finetuned_FULL


In [None]:
!zip -r llama3.2_book_finetuned_50_steps.zip ./llama3.2_book_finetuned_50_steps_FULL

  adding: .config/ (stored 0%)
  adding: .config/default_configs.db (deflated 98%)
  adding: .config/config_sentinel (stored 0%)
  adding: .config/.last_update_check.json (deflated 22%)
  adding: .config/active_config (stored 0%)
  adding: .config/.last_survey_prompt.yaml (stored 0%)
  adding: .config/gce (stored 0%)
  adding: .config/configurations/ (stored 0%)
  adding: .config/configurations/config_default (deflated 15%)
  adding: .config/hidden_gcloud_config_universe_descriptor_data_cache_configs.db (deflated 97%)
  adding: .config/.last_opt_in_prompt.yaml (stored 0%)
  adding: .config/logs/ (stored 0%)
  adding: .config/logs/2025.12.09/ (stored 0%)
  adding: .config/logs/2025.12.09/14.41.43.412452.log (deflated 56%)
  adding: .config/logs/2025.12.09/14.41.42.675750.log (deflated 57%)
  adding: .config/logs/2025.12.09/14.41.18.717681.log (deflated 58%)
  adding: .config/logs/2025.12.09/14.41.27.893750.log (deflated 86%)
  adding: .config/logs/2025.12.09/14.40.47.605300.log (deflate

# Now you can download the all.zip folder to get all the data