In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
!pip install -q transformers datasets peft accelerate bitsandbytes sentencepiece scikit-learn


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.0/67.0 MB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m125.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m86.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m57.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from peft import LoraConfig, get_peft_model, TaskType

model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # Replace with actual TinyLlama HF repo or path

tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load model with 4-bit quantization for efficiency if desired
from transformers import BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [3]:
import pandas as pd
from datasets import Dataset
from sklearn.model_selection import train_test_split

data = pd.read_csv("/content/drive/MyDrive/NvidiaDocumentationQandApairs.csv")

# Optional: Clean text as you did earlier (lowercase, remove non-alphanumeric)

def clean_text(text):
    import re
    text = text.lower()
    text = re.sub('[^A-Za-z0-9\s]+', '', text)
    return text

data['question'] = data['question'].apply(clean_text)
data['answer'] = data['answer'].apply(clean_text)

# Create a new column with prompt+answer concatenated
def build_prompt(q, a):
    return f"Question: {q}\nAnswer: {a}"

data['input_text'] = data.apply(lambda row: build_prompt(row['question'], row['answer']), axis=1)

# Split train/val/test
train_df, test_df = train_test_split(data, test_size=0.3, random_state=42)
val_df, test_df = train_test_split(test_df, test_size=0.5, random_state=42)

train_ds = Dataset.from_pandas(train_df)
val_ds = Dataset.from_pandas(val_df)
test_ds = Dataset.from_pandas(test_df)


In [4]:
def tokenize_function(examples):
    # Tokenize full prompt+answer text
    tokenized = tokenizer(
        examples["input_text"],
        truncation=True,
        max_length=256,
        padding="max_length",
    )
    # Labels are same as input_ids (causal LM)
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

train_tokenized = train_ds.map(tokenize_function, batched=True, remove_columns=train_ds.column_names)
val_tokenized = val_ds.map(tokenize_function, batched=True, remove_columns=val_ds.column_names)
test_tokenized = test_ds.map(tokenize_function, batched=True, remove_columns=test_ds.column_names)


Map:   0%|          | 0/4975 [00:00<?, ? examples/s]

Map:   0%|          | 0/1066 [00:00<?, ? examples/s]

Map:   0%|          | 0/1067 [00:00<?, ? examples/s]

In [5]:
lora_config = LoraConfig(
    r=16,                 # smaller rank for tiny model
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],  # typical for LLaMA; confirm correct module names for TinyLlama
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)

model = get_peft_model(model, lora_config)

def print_trainable_params(model):
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    all_params = sum(p.numel() for p in model.parameters())
    print(f"Trainable params: {trainable_params} / {all_params} ({100*trainable_params/all_params:.2f}%)")

print_trainable_params(model)


Trainable params: 2252800 / 617859072 (0.36%)


In [11]:
from transformers import Trainer, TrainingArguments, DataCollatorForLanguageModeling

training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/tinyllama-lora-sft",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    eval_strategy="epoch",
    save_strategy="epoch",
    num_train_epochs=4,
    learning_rate=2e-4,
    logging_steps=50,
    save_total_limit=2,
    fp16=torch.cuda.is_available(),
    load_best_model_at_end=True,
    report_to="none",
    label_names=["labels"],

)

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,  # causal LM
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_tokenized,
    eval_dataset=val_tokenized,
    tokenizer=tokenizer,
    data_collator=data_collator,
)


  trainer = Trainer(


In [12]:
trainer.train()


Epoch,Training Loss,Validation Loss
1,1.8127,1.91078
2,1.692,1.867759
3,1.608,1.851969
4,1.5496,1.859634


TrainOutput(global_step=2488, training_loss=1.6706181385126144, metrics={'train_runtime': 1700.8417, 'train_samples_per_second': 11.7, 'train_steps_per_second': 1.463, 'total_flos': 3.16901793202176e+16, 'train_loss': 1.6706181385126144, 'epoch': 4.0})

In [13]:
output_dir = "/content/drive/MyDrive/tinyllama-lora-sft-tuned-model"

# Save the PEFT model (includes LoRA weights)
trainer.model.save_pretrained(output_dir)

# Save the tokenizer as well
tokenizer.save_pretrained(output_dir)


('/content/drive/MyDrive/tinyllama-lora-sft-tuned-model/tokenizer_config.json',
 '/content/drive/MyDrive/tinyllama-lora-sft-tuned-model/special_tokens_map.json',
 '/content/drive/MyDrive/tinyllama-lora-sft-tuned-model/chat_template.jinja',
 '/content/drive/MyDrive/tinyllama-lora-sft-tuned-model/tokenizer.model',
 '/content/drive/MyDrive/tinyllama-lora-sft-tuned-model/added_tokens.json',
 '/content/drive/MyDrive/tinyllama-lora-sft-tuned-model/tokenizer.json')