In [1]:
%%capture
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
from torch import __version__ as torch_version
from packaging.version import Version as V

xformers = "xformers==0.0.27" if V(torch_version) < V("2.4.0") else "xformers"
!pip install --no-deps {xformers} trl peft accelerate bitsandbytes datasets


In [2]:
import torch
from datasets import Dataset
from unsloth import FastLanguageModel, is_bfloat16_supported
from transformers import TrainingArguments
from trl import SFTTrainer

device = "cuda" if torch.cuda.is_available() else "cpu"
max_seq_length = 512
dtype = None


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [3]:
model_name = "unsloth/SmolLM2-135M-Instruct"

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name       = model_name,
    max_seq_length   = max_seq_length,
    dtype            = dtype,
    load_in_4bit     = False,      # full weights, small model
    full_finetuning  = True,       # we want to update all weights
)

FastLanguageModel.for_training(model)
model.to(device)


==((====))==  Unsloth 2025.11.2: Fast Llama patching. Transformers: 4.57.1.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Float16 full finetuning uses more memory since we upcast weights to float32.


model.safetensors:   0%|          | 0.00/269M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/158 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/423 [00:00<?, ?B/s]

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(49153, 576, padding_idx=49152)
    (layers): ModuleList(
      (0-29): 30 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=576, out_features=576, bias=False)
          (k_proj): Linear(in_features=576, out_features=192, bias=False)
          (v_proj): Linear(in_features=576, out_features=192, bias=False)
          (o_proj): Linear(in_features=576, out_features=576, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=576, out_features=1536, bias=False)
          (up_proj): Linear(in_features=576, out_features=1536, bias=False)
          (down_proj): Linear(in_features=1536, out_features=576, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): LlamaRMSNorm((576,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((576,), eps=1e-05)
      )
    )
    (norm): LlamaRMSNorm((576,), eps=1e-05)
  

In [4]:
texts = [
    "यह एक साधारण हिंदी वाक्य है जो भाषा सीखने के लिए लिखा गया है।",
    "कृत्रिम बुद्धिमत्ता हमारे दैनिक जीवन में कई समस्याओं को हल करने में मदद कर रही है।",
    "आप हर दिन थोड़ा अभ्यास करेंगे तो धीरे-धीरे आपकी भाषा कौशल बेहतर हो जाएगी।",
    "डेटा साइंस और मशीन लर्निंग आधुनिक दुनिया में महत्वपूर्ण कौशल बन चुके हैं।",
    "परीक्षा से पहले शांत रहना और अच्छे नोट्स दोहराना बहुत ज़रूरी है।",
    # add more lines in the target language
]

cpt_dataset = Dataset.from_dict({"text": texts})
cpt_dataset


Dataset({
    features: ['text'],
    num_rows: 5
})

In [5]:
def tokenize_func(examples):
    return tokenizer(
        examples["text"],
        truncation = True,
        max_length = max_seq_length,
    )

tokenized_cpt = cpt_dataset.map(
    tokenize_func,
    batched = True,
    remove_columns = ["text"],
)
tokenized_cpt


Map:   0%|          | 0/5 [00:00<?, ? examples/s]

Dataset({
    features: ['input_ids', 'attention_mask'],
    num_rows: 5
})

In [6]:
# Wrap into a dataset with a 'text' field that SFTTrainer expects
def ids_to_text(examples):
    # decode is only for debugging; Trainer will use labels from tokenization anyway
    decoded = [tokenizer.decode(ids, skip_special_tokens=True)
               for ids in examples["input_ids"]]
    return {"text": decoded}

cpt_text_ds = tokenized_cpt.map(ids_to_text, batched=True)
print(cpt_text_ds[0]["text"])


Map:   0%|          | 0/5 [00:00<?, ? examples/s]

यह एक साधारण हिंदी वाक्य है जो भाषा सीखने के लिए लिखा गया है।


In [7]:
training_args = TrainingArguments(
    output_dir                  = "smollm2-135m-cpt-newlang",
    num_train_epochs            = 5,
    per_device_train_batch_size = 4,
    gradient_accumulation_steps = 1,
    learning_rate               = 2e-4,
    warmup_steps                = 5,
    logging_steps               = 5,
    save_strategy               = "epoch",
    fp16                        = True,
    report_to                   = "none",
)

cpt_trainer = SFTTrainer(
    model              = model,
    tokenizer          = tokenizer,
    train_dataset      = cpt_text_ds,
    dataset_text_field = "text",
    max_seq_length     = max_seq_length,
    packing            = False,
    args               = training_args,
)

cpt_trainer.train()


The model is already on multiple devices. Skipping the move to device specified in `args`.
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 5 | Num Epochs = 5 | Total steps = 10
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 1
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 1 x 1) = 4
 "-____-"     Trainable parameters = 134,515,584 of 134,515,584 (100.00% trained)


Step,Training Loss
5,2.1243
10,1.1529


TrainOutput(global_step=10, training_loss=1.6386237144470215, metrics={'train_runtime': 181.518, 'train_samples_per_second': 0.138, 'train_steps_per_second': 0.055, 'total_flos': 1354094064000.0, 'train_loss': 1.6386237144470215, 'epoch': 5.0})

In [8]:
FastLanguageModel.for_inference(model)
inference_dtype = torch.bfloat16 if is_bfloat16_supported() else torch.float16
model = model.to(device=device, dtype=inference_dtype)

def gen_new_lang(prompt, max_new_tokens=80):
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    with torch.inference_mode():
        outputs = model.generate(
            **inputs,
            max_new_tokens = max_new_tokens,
            do_sample      = True,
            top_p          = 0.9,
            temperature    = 0.8,
        )
    print(tokenizer.decode(outputs[0], skip_special_tokens=True))

# Try prompts in that language
gen_new_lang("कृत्रिम बुद्धिमत्ता के बारे में दो वाक्य लिखिए।")
gen_new_lang("परीक्षा की तैयारी करते समय छात्र को क्या ध्यान रखना चाहिए?")


कृत्रिम बुद्धिमत्ता के बारे में दो वाक्य लिखिए। बहुत दोहराने के लिए अभ्यास करेंगी।। जो जो दोहराने के लिए अभ्यास कर रहना �
परीक्षा की तैयारी करते समय छात्र को क्या ध्यान रखना चाहिए? है। एक लिए एक लिए महत्वपूर्ण है। एक संत्तारी साइएगी।।।।।।।।�


In [9]:
cpt_save_dir = "smollm2-135m-cpt-newlang-final"
model.save_pretrained(cpt_save_dir)
tokenizer.save_pretrained(cpt_save_dir)
print(f"Saved CPT model to {cpt_save_dir}")


Saved CPT model to smollm2-135m-cpt-newlang-final
