In [1]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [2]:
!pip install --upgrade pip

# Unsloth and dependencies
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

# Training stack
!pip install --no-deps trl peft accelerate bitsandbytes datasets

Collecting pip
  Downloading pip-25.3-py3-none-any.whl.metadata (4.7 kB)
Downloading pip-25.3-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m31.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 24.1.2
    Uninstalling pip-24.1.2:
      Successfully uninstalled pip-24.1.2
Successfully installed pip-25.3
Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-xgdwbldt/unsloth_b85675b296b342d0ab7de4d404626849
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-xgdwbldt/unsloth_b85675b296b342d0ab7de4d404626849
  Resolved https://github.com/unslothai/unsloth.git to commit 0fb14e6a76f3695d01314d7b3faf7252141d9f56
  Installing build depe

In [3]:
import torch
from unsloth import FastLanguageModel
from datasets import Dataset
from sklearn.model_selection import train_test_split
from trl import SFTTrainer
from transformers import TrainingArguments

device = "cuda" if torch.cuda.is_available() else "cpu"
device


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


'cuda'

In [4]:
ES_TRAIN_PATH = "/content/drive/My Drive/Machine Translation/Project/data/train/all-filtered.es.real.smalltrain"
EN_TRAIN_PATH = "/content/drive/My Drive/Machine Translation/Project/data/train/all-filtered.en.real.smalltrain"

with open(ES_TRAIN_PATH, encoding="utf-8") as f_es:
    es_sentences = [line.strip() for line in f_es]

with open(EN_TRAIN_PATH, encoding="utf-8") as f_en:
    en_sentences = [line.strip() for line in f_en]

print("Spanish lines:", len(es_sentences))
print("English lines:", len(en_sentences))

# Align and keep only the matching prefix
n = min(len(es_sentences), len(en_sentences))
es_sentences = es_sentences[:n]
en_sentences = en_sentences[:n]

print("Using", n, "parallel pairs")
print("Example ES:", es_sentences[0])
print("Example EN:", en_sentences[0])

Spanish lines: 10000
English lines: 10000
Using 10000 parallel pairs
Example ES: El consumo nocivo de alcohol es responsable por cerca de 3% de todas las muertes que ocurren en el planeta, incluyendo desde cirrosis y cáncer hepático hasta accidentes, caídas, intoxicaciones y homicidios.
Example EN: The harmful use of alcohol is responsible for about 3% of all deaths that occur on the planet, ranging from liver cancer and cirrhosis to accidents, falls, poisoning and murder.


In [5]:
MAX_TRAIN_PAIRS = 10_000
n = min(n, MAX_TRAIN_PAIRS)
es_sentences = es_sentences[:n]
en_sentences = en_sentences[:n]

In [6]:
data = [
    {"src": src, "tgt": tgt}
    for src, tgt in zip(es_sentences, en_sentences)
]

train_data, val_data = train_test_split(
    data,
    test_size=0.1,
    random_state=42,
)

train_dataset = Dataset.from_list(train_data)
val_dataset = Dataset.from_list(val_data)

train_dataset, val_dataset


(Dataset({
     features: ['src', 'tgt'],
     num_rows: 9000
 }),
 Dataset({
     features: ['src', 'tgt'],
     num_rows: 1000
 }))

In [7]:
MODEL_NAME = "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit"

max_seq_length = 512
dtype = None
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name     = MODEL_NAME,
    max_seq_length = max_seq_length,
    dtype          = dtype,
    load_in_4bit   = load_in_4bit,
)

tokenizer.padding_side = "left"
tokenizer.pad_token = tokenizer.eos_token

print("Model loaded. Device:", next(model.parameters()).device)

==((====))==  Unsloth 2025.11.4: Fast Llama patching. Transformers: 4.57.2.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.5.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.70G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

Model loaded. Device: cuda:0


In [8]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 8,
    lora_alpha = 16,
    lora_dropout = 0.05,
    target_modules = [
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
)

model.print_trainable_parameters()

Unsloth: Dropout = 0 is supported for fast patching. You are using dropout = 0.05.
Unsloth will patch all other layers, except LoRA matrices, causing a performance hit.
Unsloth 2025.11.4 patched 32 layers with 0 QKV layers, 0 O layers and 0 MLP layers.


trainable params: 20,971,520 || all params: 8,051,232,768 || trainable%: 0.2605


In [9]:
def add_text_column(example):
    src = example["src"]
    tgt = example["tgt"]

    messages = [
        {
            "role": "system",
            "content": "You are a professional translator that translates from Spanish to English.",
        },
        {
            "role": "user",
            "content": f"Translate this sentence from Spanish to English:\n{src}",
        },
        {
            "role": "assistant",
            "content": tgt,
        },
    ]

    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=False,
    )

    return {"text": text}


train_dataset_formatted = train_dataset.map(add_text_column)
val_dataset_formatted = val_dataset.map(add_text_column)

train_dataset_formatted = train_dataset_formatted.remove_columns(["src", "tgt"])
val_dataset_formatted = val_dataset_formatted.remove_columns(["src", "tgt"])

train_dataset_formatted[0]

Map:   0%|          | 0/9000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

{'text': '<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nCutting Knowledge Date: December 2023\nToday Date: 26 Jul 2024\n\nYou are a professional translator that translates from Spanish to English.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nTranslate this sentence from Spanish to English:\nLa crisis financiera asociada con la pandemia de enfermedad por coronavirus de 2019-2020 tiene un impacto amplio y severo en los mercados financieros, incluidos los mercados de acciones, bonos y materias primas (incluidos el petróleo crudo y el oro).<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nEconomic turmoil associated with the 2019–20 coronavirus pandemic has wide-ranging and severe impacts upon financial markets, including stock, bond, and commodity (including crude oil and gold) markets.<|eot_id|>'}

In [10]:
output_dir = "/content/drive/My Drive/Machine Translation/Project/llama31_es_en_lora"

training_args = TrainingArguments(
    output_dir = output_dir,
    per_device_train_batch_size = 8,       # small batch for T4
    per_device_eval_batch_size  = 4,
    gradient_accumulation_steps = 8,      # effective batch size 32
    learning_rate = 2e-4,
    num_train_epochs = 3,
    warmup_ratio = 0.03,
    logging_steps = 50,
    eval_strategy = "steps",
    eval_steps = 200,
    save_steps = 200,
    save_total_limit = 2,
    bf16 = torch.cuda.is_bf16_supported(),
    fp16 = not torch.cuda.is_bf16_supported(),
    report_to = [],                       # no wandb
)

In [11]:
trainer = SFTTrainer(
    model = model,
    train_dataset = train_dataset_formatted,
    eval_dataset  = val_dataset_formatted,
    tokenizer = tokenizer,
    args = training_args,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
)

Unsloth: Tokenizing ["text"] (num_proc=6):   0%|          | 0/9000 [00:00<?, ? examples/s]

Unsloth: Tokenizing ["text"] (num_proc=6):   0%|          | 0/1000 [00:00<?, ? examples/s]

In [12]:
trainer.train()

The model is already on multiple devices. Skipping the move to device specified in `args`.
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 9,000 | Num Epochs = 3 | Total steps = 423
O^O/ \_/ \    Batch size per device = 8 | Gradient accumulation steps = 8
\        /    Data Parallel GPUs = 1 | Total batch size (8 x 8 x 1) = 64
 "-____-"     Trainable parameters = 20,971,520 of 8,051,232,768 (0.26% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss,Validation Loss
200,0.8639,0.89163
400,0.7778,0.891801


Unsloth: Not an error, but LlamaForCausalLM does not accept `num_items_in_batch`.
Using gradient accumulation will be very slightly less accurate.
Read more on gradient accumulation issues here: https://unsloth.ai/blog/gradient


TrainOutput(global_step=423, training_loss=0.9101785204371098, metrics={'train_runtime': 14809.2351, 'train_samples_per_second': 1.823, 'train_steps_per_second': 0.029, 'total_flos': 2.3604449603833037e+17, 'train_loss': 0.9101785204371098, 'epoch': 3.0})

In [13]:
save_dir = output_dir  # reuse same directory

trainer.model.save_pretrained(save_dir)
tokenizer.save_pretrained(save_dir)

print("Saved LoRA adapter and tokenizer to", save_dir)

Saved LoRA adapter and tokenizer to /content/drive/My Drive/Machine Translation/Project/llama31_es_en_lora


In [14]:
# Simple sanity test for the fine-tuned model

test_sentence = "Me duele la cabeza desde ayer."

messages = [
    {
        "role": "system",
        "content": "You are a professional translator that translates from Spanish to English.",
    },
    {
        "role": "user",
        "content": f"Translate this sentence from Spanish to English:\n{test_sentence}",
    },
]

prompt = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True,
)

inputs = tokenizer(
    prompt,
    return_tensors="pt",
    truncation=True,
    max_length=512,
).to(model.device)

with torch.no_grad():
    gen_ids = model.generate(
        **inputs,
        max_new_tokens=64,
        do_sample=False,
        pad_token_id=tokenizer.eos_token_id,
    )

generated = tokenizer.decode(gen_ids[0][inputs["input_ids"].shape[1]:],
                             skip_special_tokens=True).strip()

print("Spanish :", test_sentence)
print("English :", generated)

Spanish : Me duele la cabeza desde ayer.
English : I have had a headache since yesterday.
