# Imports

In [1]:
from transformers import RobertaTokenizer, T5ForConditionalGeneration
from Datasets import Api_Dataset, Mbpp_Dataset
import torch
import numpy as np
from Datasets import Test_Dataset
from torch.utils.data import DataLoader
import pandas as pd
from utils import get_module_names, get_module_by_name
import torch.nn.utils.prune as prune
from utils import get_module_by_name, get_module_names, get_bleu_score, fine_tune
import torch.backends.cudnn as cudnn

# Load Model

In [None]:
model_path = 'final_model'
# Set up device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Your device is {device}.')
# Set up model and tokenizer
model = T5ForConditionalGeneration.from_pretrained(model_path)
tokenizer = RobertaTokenizer.from_pretrained(model_path)
model = model.to(device)
if device == 'cuda':
    model = torch.nn.DataParallel(model)
    cudnn.benchmark = True
checkpoint = torch.load(f'./sparse_models/T5ForConditionalGeneration/exp1/sparse_weights_finetuned.pth')
model.load_state_dict(checkpoint)

Your device is cuda.


  return torch.load(checkpoint_file, map_location="cpu")


# Load Datasets and Data Loaders

## MBPP Dataset

In [3]:
Mbpp_train_dataset = Mbpp_Dataset('mbpp_train.csv', text_length=128, code_length=128)
Mbpp_valid_dataset = Mbpp_Dataset('mbpp_valid.csv', text_length=128, code_length=128)

mbpp_train_loader = DataLoader(Mbpp_train_dataset, batch_size=8, shuffle=True)
mbpp_val_loader = DataLoader(Mbpp_valid_dataset, batch_size=8, shuffle=False)

mbpp_test_dataset = Test_Dataset(data='data/mbpp/mbpp_test.csv', task_prefix='Generate code from natural language: (from Mbpp)')
mbpp_test_loader = DataLoader(dataset=mbpp_test_dataset, batch_size=128, shuffle=False)
MBPP_Test_DF = pd.read_csv('data/mbpp/mbpp_test.csv')



## Python-API Dataset

In [47]:
Api_train_dataset = Api_Dataset('api-mined_train.csv', text_length=256, code_length=64)
Api_valid_dataset = Api_Dataset('api-mined_valid.csv', text_length=256, code_length=64)

api_train_loader = DataLoader(Api_train_dataset, batch_size=16, shuffle=True)
api_val_loader = DataLoader(Api_valid_dataset, batch_size=16, shuffle=False)

api_test_dataset = Test_Dataset(data='data/pythonapi/test_processing.csv', task_prefix='Generate code from natural language: (from PythonAPI)')
api_test_loader = DataLoader(dataset=api_test_dataset, batch_size=256, shuffle=False)
API_Test_DF = pd.read_csv('data/pythonapi/test_processing.csv')



# Pruned Model Performance

## MBPP Dataset

In [4]:
print("BLEU score:", get_bleu_score(model, tokenizer, mbpp_test_loader, MBPP_Test_DF))

BLEU score: 0.13894053612378582


## Python-API Dataset

In [6]:
print("BLEU Score:", get_bleu_score(model, tokenizer, api_test_loader, API_Test_DF))

BLEU Score: 0.2281461434656612


In [6]:
module_names = []
get_module_names(model, '', module_names)
module_names = [
    'encoder.block.0.layer.0.SelfAttention.q',
    'encoder.block.0.layer.0.SelfAttention.k',
    'encoder.block.0.layer.0.SelfAttention.v',
    'encoder.block.0.layer.0.SelfAttention.o',
    'encoder.block.0.layer.0.SelfAttention.relative_attention_bias',
    'encoder.block.0.layer.0.layer_norm',
    'encoder.block.0.layer.1.DenseReluDense.wi',
    'encoder.block.0.layer.1.DenseReluDense.wo',
    'encoder.block.0.layer.1.layer_norm',
]
module_names

['encoder.block.0.layer.0.SelfAttention.q',
 'encoder.block.0.layer.0.SelfAttention.k',
 'encoder.block.0.layer.0.SelfAttention.v',
 'encoder.block.0.layer.0.SelfAttention.o',
 'encoder.block.0.layer.0.SelfAttention.relative_attention_bias',
 'encoder.block.0.layer.0.layer_norm',
 'encoder.block.0.layer.1.DenseReluDense.wi',
 'encoder.block.0.layer.1.DenseReluDense.wo',
 'encoder.block.0.layer.1.layer_norm']

In [7]:
parameters_to_prune = []
pruneable_module_names = []
for module_name in module_names:
    if 'embed_tokens' in module_name:
        continue
    module = get_module_by_name(model, module_name)
    try:
        module.weight.shape
        parameters_to_prune.append((module, 'weight'))
        pruneable_module_names.append((module_name, np.prod(module.weight.shape)))
    except:
        pass
parameters_to_prune[:10]
# pruneable_module_names[:10]

[(Linear(in_features=768, out_features=768, bias=False), 'weight'),
 (Linear(in_features=768, out_features=768, bias=False), 'weight'),
 (Linear(in_features=768, out_features=768, bias=False), 'weight'),
 (Linear(in_features=768, out_features=768, bias=False), 'weight'),
 (Embedding(32, 12), 'weight'),
 (T5LayerNorm(), 'weight'),
 (Linear(in_features=768, out_features=3072, bias=False), 'weight'),
 (Linear(in_features=3072, out_features=768, bias=False), 'weight'),
 (T5LayerNorm(), 'weight')]

In [10]:
count = 0
net_size = 0
pairs = []
sparsity_levels = []
sparse_parameters = 0
total_parameters = 0

for name, param in model.named_parameters():
    if param.requires_grad and name[:-7] in module_names:
        print(name[:-7], param.size())
        count += 1
        net_size += np.prod(param.size())
        pairs.append((name, np.prod(param.size())))
        layer_weights = param.to('cpu')
        layer_weights = np.array(layer_weights.detach())
        sparsity = (layer_weights.size - np.count_nonzero(layer_weights))/layer_weights.size
        sparsity_levels.append(sparsity)
        sparse_parameters += (layer_weights.size - np.count_nonzero(layer_weights))
        total_parameters += layer_weights.size

print(f"Total number of layers: {count}")
print(f"Total number of parameters: {net_size}")
print(f"Sparsity Levels: {sparsity_levels}")
print(f"Sparse Parameters: {sparse_parameters}")
print(f"Total Parameters: {total_parameters}")
print(f"Net Sparsity: {sparse_parameters/total_parameters}")

encoder.block.0.layer.0.SelfAttention.q torch.Size([768, 768])
encoder.block.0.layer.0.SelfAttention.k torch.Size([768, 768])
encoder.block.0.layer.0.SelfAttention.v torch.Size([768, 768])
encoder.block.0.layer.0.SelfAttention.o torch.Size([768, 768])
encoder.block.0.layer.0.SelfAttention.relative_attention_bias torch.Size([32, 12])
encoder.block.0.layer.0.layer_norm torch.Size([768])
encoder.block.0.layer.1.DenseReluDense.wi torch.Size([3072, 768])
encoder.block.0.layer.1.DenseReluDense.wo torch.Size([768, 3072])
encoder.block.0.layer.1.layer_norm torch.Size([768])
Total number of layers: 9
Total number of parameters: 7079808
Sparsity Levels: [0.2000003390842014, 0.2000003390842014, 0.2000003390842014, 0.2000003390842014, 0.20052083333333334, 0.20052083333333334, 0.19999991522894967, 0.19999991522894967, 0.20052083333333334]
Sparse Parameters: 1415963
Total Parameters: 7079808
Net Sparsity: 0.2000001977454756


# Fine-tuning

## Apply the pruning masks

In [12]:
# prune.global_unstructured(
#     parameters_to_prune,
#     pruning_method=prune.L1Unstructured,
#     amount=SPARSITY,
# )
for module, name in parameters_to_prune:
    prune.l1_unstructured(module, name=name, amount=SPARSITY)

## Python-API Dataset

In [16]:
for iter in range(1):
    print(f"Iteration {iter+1}:")
    fine_tune(model, api_train_loader, api_val_loader, epochs=1, learning_rate=1e-6)
    print(f"BLEU Score after {(iter+1)} iterations:", get_bleu_score(model, tokenizer, api_test_loader, API_Test_DF))

Iteration 1:
Epoch 1/1:
Train: batch: 10/750, Average loss:0.4205333173274994, Current loss:0.40900719165802
Train: batch: 20/750, Average loss:0.4155389741063118, Current loss:0.32355183362960815
Train: batch: 30/750, Average loss:0.40862095753351846, Current loss:0.31301403045654297
Train: batch: 40/750, Average loss:0.41205747947096827, Current loss:0.6436066627502441
Train: batch: 50/750, Average loss:0.405461967587471, Current loss:0.3688463866710663
Train: batch: 60/750, Average loss:0.4102784335613251, Current loss:0.3638272285461426
Train: batch: 70/750, Average loss:0.40467696615627835, Current loss:0.5133880972862244
Train: batch: 80/750, Average loss:0.3950720202177763, Current loss:0.3146379888057709
Train: batch: 90/750, Average loss:0.3852678866850005, Current loss:0.28135284781455994
Train: batch: 100/750, Average loss:0.38091221541166304, Current loss:0.22692283987998962
Train: batch: 110/750, Average loss:0.37284686470573597, Current loss:0.33059027791023254
Train: bat

## MBPP Dataset

In [13]:
for iter in range(3):
    print(f"Iteration {iter+1}:")
    fine_tune(model, mbpp_train_loader, mbpp_val_loader, epochs=1, learning_rate=1e-6)
    print(f"BLEU score after {(iter+1)} iterations:", get_bleu_score(model, tokenizer, mbpp_test_loader, MBPP_Test_DF))

Iteration 1:
Epoch 1/1:
Train: batch: 10/98, Average loss:0.055892310477793215, Current loss:0.016848690807819366
Train: batch: 20/98, Average loss:0.05721310395747423, Current loss:0.07930701225996017
Train: batch: 30/98, Average loss:0.06504064599672953, Current loss:0.02265237271785736
Train: batch: 40/98, Average loss:0.06531971795484423, Current loss:0.03177298977971077
Train: batch: 50/98, Average loss:0.06365624591708183, Current loss:0.07167372852563858
Train: batch: 60/98, Average loss:0.06758188474923373, Current loss:0.05032338947057724
Train: batch: 70/98, Average loss:0.06796416826546192, Current loss:0.12004649639129639
Train: batch: 80/98, Average loss:0.06716325941961258, Current loss:0.037740327417850494
Train: batch: 90/98, Average loss:0.06781859871827894, Current loss:0.08000339567661285
Train: batch: 98/98, Average loss:0.06636714784200398, Current loss:0.02468978427350521
Train Loss: 0.06636714784200398
Valid: batch: 10/13, Average loss:0.03492530600633472, Curren

## Remove the masks (make them permanent)

In [14]:
for module, _ in parameters_to_prune:
    prune.remove(module, 'weight')

# Performance after Fine-tuning

## MBPP Dataset

In [15]:
print("BLEU score:", get_bleu_score(model, tokenizer, mbpp_test_loader, MBPP_Test_DF))

BLEU score: 0.13483448974190115


## Python-API Dataset

In [20]:
print("BLEU Score:", get_bleu_score(model, tokenizer, api_test_loader, API_Test_DF))

BLEU Score: 0.2917530479398626


# Save Model

In [21]:
model.save_pretrained('model/magnitude_pruning_20')
tokenizer.save_pretrained('model/magnitude_pruning_20')

('model/magnitude_pruning_20/tokenizer_config.json',
 'model/magnitude_pruning_20/special_tokens_map.json',
 'model/magnitude_pruning_20/vocab.json',
 'model/magnitude_pruning_20/merges.txt',
 'model/magnitude_pruning_20/added_tokens.json')