In [1]:
# Mount google drive
from google.colab import drive
ROOT = "/content/drive"
print(ROOT)                 # print content of ROOT (Optional)

drive.mount(ROOT)

/content/drive
Mounted at /content/drive


In [None]:
!pip install transformers
!pip install datasets
!pip install deepspeed
!pip install sentencepiece
!pip install sacrebleu

In [2]:
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer, DataCollatorForSeq2Seq
import sentencepiece
import torch
import numpy as np
import matplotlib.pyplot as plt
import re

In [3]:
%cd drive/MyDrive/Repos/llm-sparsification-cvf/

/content/drive/MyDrive/Repos/llm-sparsification-cvf


In [4]:
from src.exploration_utils import M2M100_layer_analysis, collect_params, plot_weight_distro
from src.pruning_utils import prune_M2M100_layers

In [5]:
%load_ext autoreload

In [6]:
%autoreload 2

In [7]:
model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_1.2B")
tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_1.2B")

In [None]:
params = model.state_dict()
cuda = torch.device('cuda')  
cpu = torch.device('cpu')
#model.to(cuda)

In [9]:
len(params.keys())

1018

In [10]:
print(f"Total Parameters: {sum(p.numel() for p in model.parameters())}")

Total Parameters: 1239470080


In [13]:
# sample strings
hi_text = "जीवन एक चॉकलेट बॉक्स की तरह है।"
chinese_text = "生活就像一盒巧克力。"

In [14]:
# translate Hindi to French
tokenizer.src_lang = "hi"
encoded_hi = tokenizer(hi_text, return_tensors="pt")
generated_tokens = model.generate(**encoded_hi, forced_bos_token_id=tokenizer.get_lang_id("fr"))
tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)



['La vie est comme une boîte de chocolat.']

In [15]:
# translate Chinese to English
tokenizer.src_lang = "zh"
encoded_zh = tokenizer(chinese_text, return_tensors="pt")
generated_tokens = model.generate(**encoded_zh, forced_bos_token_id=tokenizer.get_lang_id("en"))
tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)

['Life is like a box of chocolate.']

In [11]:
# load dataset and metric for evaluation
from datasets import load_dataset, load_metric
raw_datasets = load_dataset("wmt16", "de-en")
metric = load_metric("sacrebleu")



  0%|          | 0/3 [00:00<?, ?it/s]

  after removing the cwd from sys.path.


In [12]:
from torch.nn.modules import padding
max_input_length = 64
max_target_length = 64
source_lang = "en"
target_lang = "de"
tokenizer.tgt_lang = target_lang 
tokenizer.src_lang = source_lang
def preprocess_function(examples):
   inputs = [ex[source_lang] for ex in examples["translation"]]
   targets = [ex[target_lang] for ex in examples["translation"]]
   model_inputs = tokenizer(inputs, max_length=max_input_length, truncation=True,
                            return_tensors="pt", padding='max_length')
   # Setup the tokenizer for targets
   #with tokenizer.as_target_tokenizer():
   labels = tokenizer(targets, max_length=max_target_length, truncation=True, padding='max_length',
                      return_tensors="pt")
   model_inputs["labels"] = labels["input_ids"]
   return model_inputs
tokenized_test_data = raw_datasets['test'].map(preprocess_function, batched=True)

  0%|          | 0/3 [00:00<?, ?ba/s]

In [20]:
small_eval_dataset = tokenized_test_data.shuffle(seed=42).select(range(10))



In [37]:
def evaluate_model(model):
    pred_tokens = model.generate(torch.tensor(small_eval_dataset['input_ids']),
                             forced_bos_token_id=tokenizer.get_lang_id("de"),
                             max_length=64)
    final_score = metric.compute(predictions=pred_tokens,
                                 references=torch.tensor(small_eval_dataset['labels']))
    return final_score['score']

In [None]:
#tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)

In [34]:
100 * float(torch.sum(model.model.decoder.layers[10].fc1.weight == 0)) \
/ float(model.model.decoder.layers[10].fc1.weight.nelement())

5.9604644775390625e-05

In [38]:
for sparsity_lvl in [0, 0.1, 0.5, 0.9, 0.95, 0.99]:
    print(f"Pruning at sparsity level: {sparsity_lvl}")
    prune_M2M100_layers(model, sparsity_lvl)
    result = evaluate_model(model)
    print(f"Sacrebleu Translation accuracy at sparsity level {sparsity_lvl} is: {result}")

Pruning at sparsity level: 0
Sacrebleu Translation accuracy at sparsity level 0 is: 0.300655860762251
Pruning at sparsity level: 0.1
Sacrebleu Translation accuracy at sparsity level 0.1 is: 0.30025310585853904
Pruning at sparsity level: 0.5
Sacrebleu Translation accuracy at sparsity level 0.5 is: 0.1932940031927049
Pruning at sparsity level: 0.9
Sacrebleu Translation accuracy at sparsity level 0.9 is: 0.10203657303305992
Pruning at sparsity level: 0.95
Sacrebleu Translation accuracy at sparsity level 0.95 is: 0.09749010824567156
Pruning at sparsity level: 0.99
Sacrebleu Translation accuracy at sparsity level 0.99 is: 0.09749010824567156
