In [81]:
import torch
import pandas as pd
from datasets import load_dataset, load_from_disk
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model, TaskType

In [82]:
# dataset = load_dataset("fotiecodes/jarvis-llama2-dataset")
# dataset.save_to_disk("datasets")

In [83]:
dataset = load_from_disk("datasets/train/")
dataset

Dataset({
    features: ['text'],
    num_rows: 230
})

In [84]:
df = pd.DataFrame(dataset)
print(df.columns)
print(df['text'])
print(type(df['text']))

Index(['text'], dtype='object')
0      <s>[INST] Do you need further clarification? [...
1      <s>[INST] Jarvis, what do you believe is the m...
2      <s>[INST] Jarvis, what do you admire most abou...
3      <s>[INST] Do you ever get bored? [/INST] I don...
4      <s>[INST] What are you? [/INST] I am an artifi...
                             ...                        
225    <s>[INST] Hey there! [/INST] Greetings! What c...
226    <s>[INST] What's new with you? [/INST] Just he...
227    <s>[INST] Jarvis, what makes humans so special...
228    <s>[INST] Jarvis, what do you think is the mos...
229    <s>[INST] Do you have any siblings? [/INST] I ...
Name: text, Length: 230, dtype: object
<class 'pandas.core.series.Series'>


GPU- Cuda

In [85]:
import torch.version


print("Pytorch version: ", torch.__version__)
print("Cuda version:", torch.version.cuda)
print("No of GPUs: ", torch.cuda.device_count())
print("GPU Name: ", torch.cuda.get_device_name())
device = "cuda" if torch.cuda.is_available() else "cpu"

Pytorch version:  2.7.1+cu126
Cuda version: 12.6
No of GPUs:  1
GPU Name:  NVIDIA GeForce RTX 2050


#### Quantization configurations

In [86]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_compute_dtype=torch.bfloat16,
)

#### Device & Model configurations

In [87]:
model_name = 'TinyLlama/TinyLlama-1.1B-Chat-v1.0'
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

#### LoRA configurations

In [None]:
lora_config = LoraConfig(
    r = 8,  # rank
    
    lora_alpha=16,      # scaling factor: after training the ouput of LoRA module will be scaled by 16/8 = 2.
    
    target_modules=['q_proj', 'v_proj'],    # which layers to modify. here- query & value projection layers.
    
    lora_dropout=0.05,      # regularization to prevent overfitting. here- 5% dropout(randomly zeros out parts of the LoRA weights during training)
    
    bias='none',        # here no bias terms will be added.
    
    task_type=TaskType.CAUSAL_LM    # CAUSAL_LM are models that predict the next token based on past tokens
)

In [89]:
model = get_peft_model(model, lora_config)

#### Q&A Training

In [None]:
def tokenize(batch):
    tokens = tokenizer(
        batch['text'],
        truncation=True,    # Ensures that if the tokenized sequence is longer than max_length, it will be cut off
        padding='max_length',
        max_length=256,
        return_tensors = 'pt' # returns a Pytorch Tensor
    )
    tokens['labels'] = tokens['input_ids'].clone()
    return tokens

In [91]:
tokenized_dataset = dataset.map(
    tokenize,
    batched=True,
    remove_columns=dataset.column_names
)

In [92]:
training_args = TrainingArguments(
    output_dir='trained_model/echo-first-trained',
    per_device_train_batch_size=3,
    gradient_accumulation_steps=3,
    learning_rate=1e-3,
    num_train_epochs=50,
    fp16=True,
    logging_steps=20,   # watch the status at every 20th epoch
    save_strategy='epoch',
    report_to='none',
    remove_unused_columns=False,
    label_names=['labels']
)

In [93]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    processing_class=tokenizer
)

In [94]:
trainer.train()

Step,Training Loss
20,4.0034
40,0.2703
60,0.1952
80,0.1692
100,0.1352
120,0.1112
140,0.1015
160,0.0847
180,0.0767
200,0.0666




TrainOutput(global_step=1300, training_loss=0.10785927295684815, metrics={'train_runtime': 3550.3513, 'train_samples_per_second': 3.239, 'train_steps_per_second': 0.366, 'total_flos': 1.8293523480576e+16, 'train_loss': 0.10785927295684815, 'epoch': 50.0})

In [None]:
model.save_pretrained("trained_model/echo-tinyllama-lora-adapter_finetuned")
tokenizer.save_pretrained("trained_model/echo-tinyllama-lora-adapter_finetuned")

"""What we just saved:
    1. The folder contains only the LoRA adapter weights and configuration,
    2. Tokenizer files.
    
    But: 
    *** The base model itself is not changed; the adapter is loaded and used together with it. ***
    i.e. adapter weights are applied on top of the base model at runtime. 
    
    So now we need to merge the adapters & the base model get a standalone finetuned model.
"""

('trained_model/echo-tinyllama-lora-adapter_finetuned\\tokenizer_config.json',
 'trained_model/echo-tinyllama-lora-adapter_finetuned\\special_tokens_map.json',
 'trained_model/echo-tinyllama-lora-adapter_finetuned\\chat_template.jinja',
 'trained_model/echo-tinyllama-lora-adapter_finetuned\\tokenizer.json')