In [3]:
# Mount google drive
from google.colab import drive
ROOT = "/content/drive"
print(ROOT)                 # print content of ROOT (Optional)

drive.mount(ROOT)

/content/drive
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install transformers
!pip install datasets
!pip install deepspeed
!pip install sentencepiece
!pip install sacrebleu

In [1]:
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer, DataCollatorForSeq2Seq
import sentencepiece
import torch
import numpy as np
import matplotlib.pyplot as plt
import re

In [2]:
%cd drive/MyDrive/Repos/llm-sparsification-cvf/

/content/drive/MyDrive/Repos/llm-sparsification-cvf


In [4]:
from src.exploration_utils import M2M100_layer_analysis, collect_params, plot_weight_distro
from src.pruning_utils import prune_M2M100_layers


In [5]:
%load_ext autoreload

In [6]:
%autoreload 2

In [8]:
model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_1.2B")
tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_1.2B")

In [9]:
params = model.state_dict() 

In [10]:
len(params.keys())

1018

In [12]:
print(f"Total Parameters: {sum(p.numel() for p in model.parameters())}")

Total Parameters: 1239470080


In [13]:
# sample strings
hi_text = "जीवन एक चॉकलेट बॉक्स की तरह है।"
chinese_text = "生活就像一盒巧克力。"

In [14]:
# translate Hindi to French
tokenizer.src_lang = "hi"
encoded_hi = tokenizer(hi_text, return_tensors="pt")
generated_tokens = model.generate(**encoded_hi, forced_bos_token_id=tokenizer.get_lang_id("fr"))
tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)



['La vie est comme une boîte de chocolat.']

In [15]:
# translate Chinese to English
tokenizer.src_lang = "zh"
encoded_zh = tokenizer(chinese_text, return_tensors="pt")
generated_tokens = model.generate(**encoded_zh, forced_bos_token_id=tokenizer.get_lang_id("en"))
tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)

['Life is like a box of chocolate.']

In [16]:
# load dataset and metric
from datasets import load_dataset, load_metric
raw_datasets = load_dataset("wmt16", "de-en")
metric = load_metric("sacrebleu")



  0%|          | 0/3 [00:00<?, ?it/s]

  after removing the cwd from sys.path.


In [17]:
from torch.nn.modules import padding
max_input_length = 128
max_target_length = 128
source_lang = "en"
target_lang = "de"
tokenizer.tgt_lang = target_lang 
tokenizer.src_lang = source_lang
def preprocess_function(examples):
   inputs = [ex[source_lang] for ex in examples["translation"]]
   targets = [ex[target_lang] for ex in examples["translation"]]
   model_inputs = tokenizer(inputs, max_length=max_input_length, truncation=True,
                            return_tensors="pt", padding='max_length')
   # Setup the tokenizer for targets
   #with tokenizer.as_target_tokenizer():
   labels = tokenizer(targets, max_length=max_target_length, truncation=True, padding='max_length',
                      return_tensors="pt")
   model_inputs["labels"] = labels["input_ids"]
   return model_inputs
tokenized_test_data = raw_datasets['test'].map(preprocess_function, batched=True)

  0%|          | 0/3 [00:00<?, ?ba/s]

In [20]:
small_eval_dataset = tokenized_test_data.shuffle(seed=42).select(range(100))



In [None]:
pred_tokens = model.generate(torch.tensor(small_eval_dataset['input_ids']), forced_bos_token_id=tokenizer.get_lang_id("de"))



In [113]:
pred_tokens.shape

torch.Size([10, 43])

In [116]:
torch.tensor(small_eval_dataset['labels']).shape

torch.Size([10, 128])

In [117]:
metric.add_batch(predictions=pred_tokens, references=torch.tensor(small_eval_dataset['labels']))

In [118]:
final_score = metric.compute()

In [119]:
final_score

{'score': 0.300655860762251,
 'counts': [187, 0, 0, 0],
 'totals': [450, 440, 430, 420],
 'precisions': [41.55555555555556,
  0.11363636363636363,
  0.05813953488372093,
  0.02976190476190476],
 'bp': 1.0,
 'sys_len': 450,
 'ref_len': 10}

In [64]:
tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)

KeyError: ignored

In [12]:
for name, module in model.named_modules():
  print(name)


model
model.shared
model.encoder
model.encoder.embed_tokens
model.encoder.embed_positions
model.encoder.layers
model.encoder.layers.0
model.encoder.layers.0.self_attn
model.encoder.layers.0.self_attn.k_proj
model.encoder.layers.0.self_attn.v_proj
model.encoder.layers.0.self_attn.q_proj
model.encoder.layers.0.self_attn.out_proj
model.encoder.layers.0.self_attn_layer_norm
model.encoder.layers.0.activation_fn
model.encoder.layers.0.fc1
model.encoder.layers.0.fc2
model.encoder.layers.0.final_layer_norm
model.encoder.layers.1
model.encoder.layers.1.self_attn
model.encoder.layers.1.self_attn.k_proj
model.encoder.layers.1.self_attn.v_proj
model.encoder.layers.1.self_attn.q_proj
model.encoder.layers.1.self_attn.out_proj
model.encoder.layers.1.self_attn_layer_norm
model.encoder.layers.1.activation_fn
model.encoder.layers.1.fc1
model.encoder.layers.1.fc2
model.encoder.layers.1.final_layer_norm
model.encoder.layers.2
model.encoder.layers.2.self_attn
model.encoder.layers.2.self_attn.k_proj
model.

In [32]:
100 * float(torch.sum(model.model.decoder.layers[10].fc1.weight == 0)) \
/ float(model.model.decoder.layers[10].fc1.weight.nelement())

10.000002384185791

In [31]:
prune_M2M100_layers(model, 0.1)

In [None]:
for sparsity_lvl in [0, 0.1, 0.5, 0.9, 0.95, 0.99]:
    print(f"Pruning at sparsity level: {sparsity_lvl}")
    prune_gpt2_layers(model, sparsity_lvl)
    check_gpt_layer_sparsity(model, 10)
    ppl = compute_ppl(model)
    print(f"Model perplexity at sparsity level {sparsity_lvl} is: {ppl.item()}")