In [1]:
!pip install peft bitsandbytes

^C


In [2]:
from peft import get_peft_config, get_peft_model, LoraConfig, TaskType, prepare_model_for_kbit_training, LoraRuntimeConfig, AutoPeftModelForSeq2SeqLM
from torch.profiler import profile, record_function, ProfilerActivity
from transformers import BitsAndBytesConfig
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, GenerationConfig

In [4]:
torch.cuda.empty_cache()

In [5]:
## Quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model_name = "facebook/nllb-200-distilled-600M"

model = AutoModelForSeq2SeqLM.from_pretrained(model_name, quantization_config=bnb_config)
tokenizer = AutoTokenizer.from_pretrained(model_name, src_lang="eng_Latn", tgt_lang="arb_Arab")

`low_cpu_mem_usage` was None, now default to True since model is quantized.


In [8]:
sentences = [
    "Hello! It's been a while since we last spoke.",
    "What's up? Are you free this evening?",
    "Could you please help me with this task?",
    "Thank you so much for your kindness and support.",
    "Can you pass me the salt, please?",
    "I would rather stay home and read a good book tonight.",
    "I’m sorry for the misunderstanding. It wasn’t my intention.",
    "The sky is so clear and beautiful today.",
    "If I were you, I would reconsider that decision.",
    "He thought for a moment, then replied, 'I believe this is the best choice.'"
]

preds = model.generate(**tokenizer(sentences, return_tensors='pt', padding=True))
preds = tokenizer.batch_decode(preds, skip_special_tokens=True)



In [12]:
import pandas as pd

pd.DataFrame(columns=["Input", "Output"], data=[])

[('1', 1), ('2', 2), ('3', 3)]

In [143]:
from datasets import load_dataset
from transformers import DataCollatorForSeq2Seq

dataset = load_dataset("Abdulmohsena/Classic-Arabic-English-Language-Pairs")['train']
preprocess_function = lambda examples: tokenizer(
    examples['en'], text_target=examples['ar'], max_length=256, truncation=True, padding=False)
tokenized_dataset = dataset.map(preprocess_function, batched=True, remove_columns=['en', 'ar']).shuffle()
tokenized_dataset = tokenized_dataset.train_test_split(test_size=0.20)



model.safetensors:   0%|          | 0.00/2.46G [00:00<?, ?B/s]

In [144]:
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model, padding='max_length', max_length=256, pad_to_multiple_of=8, return_tensors='pt')
train_dataloader = torch.utils.data.DataLoader(tokenized_dataset["train"], batch_size=2, shuffle=True, collate_fn=data_collator, pin_memory=True)


In [161]:
sample = next(iter(train_dataloader))

In [162]:
batch = {k: v.to('cuda') for k, v in sample.items()}

In [163]:
print(batch['input_ids'].shape)

torch.Size([2, 256])


In [164]:
tokenizer.batch_decode(batch['input_ids'], skip_special_tokens=True)

['He soon left.', 'nor does he speak out of desire.']

In [165]:
batch['input_ids']

tensor([[256047,   1808,  48072,  37814, 248075,      2,      1,      1,      1,
              1,      1,      1,      1,      1,      1,      1,      1,      1,
              1,      1,      1,      1,      1,      1,      1,      1,      1,
              1,      1,      1,      1,      1,      1,      1,      1,      1,
              1,      1,      1,      1,      1,      1,      1,      1,      1,
              1,      1,      1,      1,      1,      1,      1,      1,      1,
              1,      1,      1,      1,      1,      1,      1,      1,      1,
              1,      1,      1,      1,      1,      1,      1,      1,      1,
              1,      1,      1,      1,      1,      1,      1,      1,      1,
              1,      1,      1,      1,      1,      1,      1,      1,      1,
              1,      1,      1,      1,      1,      1,      1,      1,      1,
              1,      1,      1,      1,      1,      1,      1,      1,      1,
              1,      1,    

In [150]:
model.config.forced_bos_token_id = 256011

In [188]:
model.eval()
with torch.inference_mode():
    text = "Peace and blessings be upon him"
    
    output = model(**batch) 
    print(tokenizer.batch_decode(output.logits.argmax(dim=-1), skip_special_tokens=True))
    
    output = model.generate(**batch)
    print(tokenizer.batch_decode(output, skip_special_tokens=True))

['لقد يَس وقت يصرف', 'ولا يتطق من رغوى.ة']
['لقد غادر قريباً', 'ولا يتكلم عن رغبة في ذلك.']


In [185]:
tokenizer.batch_decode(torch.stack(output.scores).permute(1, 0, 2).argmax(dim=-1))

['arb_Arab لقد غادر قريباً</s>ces_Latn</s></s>',
 'arb_Arab ولا يتكلم عن رغبة في ذلك.</s>']

In [158]:
output.shape

torch.Size([2, 57])

In [113]:
batch.keys()

dict_keys(['input_ids', 'attention_mask', 'labels', 'decoder_input_ids'])

In [117]:
tokenizer.batch_decode(output.logits.argmax(dim=-1) * batch['attention_mask'], skip_special_tokens=True)

['وكان الرأسّ الرأس " آرّ » بسبب جمع فيه من ألوان الجميلة .سس',
 'ومنعلوم أنه ليست كذلك.دد']

In [33]:
output.logits.argmax(dim=-1).shape

torch.Size([4, 256])

In [11]:


# model = prepare_model_for_kbit_training(model) # prepares the whole model for kbit training

# for param in model.parameters():
#     param.requires_grad = False  # freeze the model - train adapters later
#     if param.ndim == 1:
#         # cast the small parameters (e.g. layernorm) to fp32 for stability
#         param.data = param.data.to(torch.float32)
    
## Low Rank Adaptation
lora_config = LoraConfig(
    use_dora=True,
    init_lora_weights="gaussian",
    task_type=TaskType.SEQ_2_SEQ_LM,
    inference_mode=False,
    bias="none",
    r=32, 
    lora_alpha=32, 
    lora_dropout=0.05,
    target_modules=['fc1']
)

model.enable_input_require_grads()
model.gradient_checkpointing_enable()
model = get_peft_model(model, lora_config)


# for param in model.get_base_model().model.encoder.parameters():
#     param.requires_grad = False

model.print_trainable_parameters()

# # Pruning, not valid because we need a sparse util
# # for name, module in model.named_modules():
# #     if isinstance(module, (torch.nn.Linear, torch.nn.Embedding)):
# #         prune.l1_unstructured(module, name='weight', amount=0.4)
# #         prune.remove(module, 'weight')

# # # https://huggingface.co/docs/optimum/en/concept_guides/quantization
# # # https://huggingface.co/docs/peft/en/index
# # # https://github.com/huggingface/peft/blob/main/examples/fp4_finetuning/finetune_fp4_opt_bnb_peft.py

trainable params: 4,030,464 || all params: 619,104,256 || trainable%: 0.6510


In [13]:
model.eval().print_trainable_parameters()

trainable params: 4,030,464 || all params: 619,104,256 || trainable%: 0.6510


In [6]:
for param in model.get_base_model().model.encoder.parameters():
    param.requires_grad = False
    
model.print_trainable_parameters()

trainable params: 27,156,480 || all params: 1,418,561,536 || trainable%: 1.9144


In [15]:
n = model.merge_and_unload()

In [19]:
n.train()

M2M100ForConditionalGeneration(
  (model): M2M100Model(
    (shared): M2M100ScaledWordEmbedding(256206, 1024, padding_idx=1)
    (encoder): M2M100Encoder(
      (embed_tokens): M2M100ScaledWordEmbedding(256206, 1024, padding_idx=1)
      (embed_positions): M2M100SinusoidalPositionalEmbedding()
      (layers): ModuleList(
        (0-11): 12 x M2M100EncoderLayer(
          (self_attn): M2M100SdpaAttention(
            (k_proj): Linear4bit(in_features=1024, out_features=1024, bias=True)
            (v_proj): Linear4bit(in_features=1024, out_features=1024, bias=True)
            (q_proj): Linear4bit(in_features=1024, out_features=1024, bias=True)
            (out_proj): Linear4bit(in_features=1024, out_features=1024, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
          (activation_fn): ReLU()
          (fc1): Linear4bit(in_features=1024, out_features=4096, bias=True)
          (fc2): Linear4bit(in_features=4096, out_featu

In [22]:
sum([i.numel() for i in n.parameters() if i.requires_grad])

0

In [10]:
model.train().print_trainable_parameters()

trainable params: 17,571,840 || all params: 632,645,632 || trainable%: 2.7775


In [7]:
model.push_to_hub(f"AbdulmohsenA/Faseeh_LoRA")

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


adapter_model.safetensors:   0%|          | 0.00/192M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/Abdulmohsena/Faseeh_LoRA/commit/2bf19a30d109994abf5a4b644701cd90b6d01393', commit_message='Upload model', commit_description='', oid='2bf19a30d109994abf5a4b644701cd90b6d01393', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Abdulmohsena/Faseeh_LoRA', endpoint='https://huggingface.co', repo_type='model', repo_id='Abdulmohsena/Faseeh_LoRA'), pr_revision=None, pr_num=None)

In [11]:
tokenizer.batch_decode(model.generate(**tokenizer("Who dis?", return_tensors='pt')))



['</s>ell_Grek Ποιος είναι;</s>']