# Imports

In [None]:
from transformers import RobertaTokenizer, T5ForConditionalGeneration
from Datasets import Api_Dataset, Mbpp_Dataset
import torch
import numpy as np
from Datasets import Test_Dataset
from torch.utils.data import DataLoader
import pandas as pd
from utils import get_module_names, get_module_by_name
import torch.nn.utils.prune as prune
from utils import get_module_by_name, get_module_names, get_bleu_score, fine_tune
import torch.backends.cudnn as cudnn

# Load Model

In [None]:
model_path = 'final_model'
# Set up device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Your device is {device}.')
# Set up model and tokenizer
model = T5ForConditionalGeneration.from_pretrained(model_path)
tokenizer = RobertaTokenizer.from_pretrained(model_path)
model = model.to(device)
if device == 'cuda':
    model = torch.nn.DataParallel(model)
    cudnn.benchmark = True

Your device is cuda.


  return torch.load(checkpoint_file, map_location="cpu")


# Load Datasets and Data Loaders

## MBPP Dataset

In [3]:
Mbpp_train_dataset = Mbpp_Dataset('mbpp_train.csv', text_length=128, code_length=128)
Mbpp_valid_dataset = Mbpp_Dataset('mbpp_valid.csv', text_length=128, code_length=128)

mbpp_train_loader = DataLoader(Mbpp_train_dataset, batch_size=8, shuffle=True)
mbpp_val_loader = DataLoader(Mbpp_valid_dataset, batch_size=8, shuffle=False)

mbpp_test_dataset = Test_Dataset(data='data/mbpp/mbpp_test.csv', task_prefix='Generate code from natural language: (from Mbpp)')
mbpp_test_loader = DataLoader(dataset=mbpp_test_dataset, batch_size=128, shuffle=False)
MBPP_Test_DF = pd.read_csv('data/mbpp/mbpp_test.csv')



## Python-API Dataset

In [4]:
Api_train_dataset = Api_Dataset('api-mined_train.csv', text_length=256, code_length=64)
Api_valid_dataset = Api_Dataset('api-mined_valid.csv', text_length=256, code_length=64)

api_train_loader = DataLoader(Api_train_dataset, batch_size=16, shuffle=True)
api_val_loader = DataLoader(Api_valid_dataset, batch_size=16, shuffle=False)

api_test_dataset = Test_Dataset(data='data/pythonapi/test_processing.csv', task_prefix='Generate code from natural language: (from PythonAPI)')
api_test_loader = DataLoader(dataset=api_test_dataset, batch_size=256, shuffle=False)
API_Test_DF = pd.read_csv('data/pythonapi/test_processing.csv')



# Original Model Performance

## MBPP Dataset

In [5]:
print("BLEU score:", get_bleu_score(model, tokenizer, mbpp_test_loader, MBPP_Test_DF))

BLEU score: 0.13894053612378582


## Python-API Dataset

In [6]:
print("BLEU Score:", get_bleu_score(model, tokenizer, api_test_loader, API_Test_DF))

BLEU Score: 0.2281461434656612


# Pruning

In [7]:
SPARSITY = 0.2

In [8]:
module_names = []
get_module_names(model, '', module_names)
module_names[:10]

['shared',
 'encoder.embed_tokens',
 'encoder.block.0.layer.0.SelfAttention.q',
 'encoder.block.0.layer.0.SelfAttention.k',
 'encoder.block.0.layer.0.SelfAttention.v',
 'encoder.block.0.layer.0.SelfAttention.o',
 'encoder.block.0.layer.0.SelfAttention.relative_attention_bias',
 'encoder.block.0.layer.0.layer_norm',
 'encoder.block.0.layer.0.dropout',
 'encoder.block.0.layer.1.DenseReluDense.wi']

In [9]:
parameters_to_prune = []
pruneable_module_names = []
for module_name in module_names:
    if 'embed_tokens' in module_name:
        continue
    module = get_module_by_name(model, module_name)
    try:
        module.weight.shape
        parameters_to_prune.append((module, 'weight'))
        pruneable_module_names.append((module_name, np.prod(module.weight.shape)))
    except:
        pass
parameters_to_prune[:10]
pruneable_module_names[:10]

[('shared', 24652800),
 ('encoder.block.0.layer.0.SelfAttention.q', 589824),
 ('encoder.block.0.layer.0.SelfAttention.k', 589824),
 ('encoder.block.0.layer.0.SelfAttention.v', 589824),
 ('encoder.block.0.layer.0.SelfAttention.o', 589824),
 ('encoder.block.0.layer.0.SelfAttention.relative_attention_bias', 384),
 ('encoder.block.0.layer.0.layer_norm', 768),
 ('encoder.block.0.layer.1.DenseReluDense.wi', 2359296),
 ('encoder.block.0.layer.1.DenseReluDense.wo', 2359296),
 ('encoder.block.0.layer.1.layer_norm', 768)]

In [10]:
prune.global_unstructured(
    parameters_to_prune,
    pruning_method=prune.L1Unstructured,
    amount=SPARSITY,
)

In [11]:
for module, _ in parameters_to_prune:
    prune.remove(module, 'weight')

In [12]:
count = 0
net_size = 0
pairs = []
sparsity_levels = []
sparse_parameters = 0
total_parameters = 0

for name, param in model.named_parameters():
    if param.requires_grad:
        # print(name, param.size())
        count += 1
        net_size += np.prod(param.size())
        pairs.append((name, np.prod(param.size())))
        layer_weights = param.to('cpu')
        layer_weights = np.array(layer_weights.detach())
        sparsity = (layer_weights.size - np.count_nonzero(layer_weights))/layer_weights.size
        sparsity_levels.append(sparsity)
        sparse_parameters += (layer_weights.size - np.count_nonzero(layer_weights))
        total_parameters += layer_weights.size

print(f"Total number of layers: {count}")
print(f"Total number of parameters: {net_size}")
print(f"Sparsity Levels: {sparsity_levels}")
print(f"Sparse Parameters: {sparse_parameters}")
print(f"Total Parameters: {total_parameters}")
print(f"Net Sparsity: {sparse_parameters/total_parameters}")

Total number of layers: 257
Total number of parameters: 222882048
Sparsity Levels: [0.09523421274662514, 0.1797146267361111, 0.17404344346788195, 0.2872229682074653, 0.2921057807074653, 0.041666666666666664, 0.059895833333333336, 0.23017798529730904, 0.24158520168728298, 0.0013020833333333333, 0.20892164442274305, 0.20511372884114584, 0.2707909478081597, 0.2691107855902778, 0.005208333333333333, 0.2257470024956597, 0.23588773939344618, 0.0, 0.2109341091579861, 0.20858595106336805, 0.2620188395182292, 0.2591264512803819, 0.0078125, 0.22086588541666666, 0.23395750257703993, 0.0, 0.21506754557291666, 0.2120903862847222, 0.2537655300564236, 0.2508816189236111, 0.0078125, 0.21743392944335938, 0.23118125067816842, 0.0, 0.2192043728298611, 0.2173614501953125, 0.24500528971354166, 0.2425452338324653, 0.0078125, 0.21404563056098092, 0.23043526543511283, 0.0, 0.21863301595052084, 0.21638319227430555, 0.24059549967447916, 0.2373267279730903, 0.0078125, 0.21276685926649305, 0.2283994886610243, 0.0

# Performance after Magnitude-based Pruning

## MBPP Dataset

In [13]:
print("BLEU score:", get_bleu_score(model, tokenizer, mbpp_test_loader, MBPP_Test_DF))

BLEU score: 0.10602251935838682


## Python-API Dataset

In [14]:
print("BLEU Score:", get_bleu_score(model, tokenizer, api_test_loader, API_Test_DF))

BLEU Score: 0.1727099428634061


# Fine-tuning

## Apply the pruning masks

In [15]:
prune.global_unstructured(
    parameters_to_prune,
    pruning_method=prune.L1Unstructured,
    amount=SPARSITY,
)

## Python-API Dataset

In [16]:
for iter in range(1):
    print(f"Iteration {iter+1}:")
    fine_tune(model, api_train_loader, api_val_loader, epochs=1, learning_rate=1e-6)
    print(f"BLEU Score after {(iter+1)} iterations:", get_bleu_score(model, tokenizer, api_test_loader, API_Test_DF))

Iteration 1:
Epoch 1/1:
Train: batch: 10/750, Average loss:0.4205333173274994, Current loss:0.40900719165802
Train: batch: 20/750, Average loss:0.4155389741063118, Current loss:0.32355183362960815
Train: batch: 30/750, Average loss:0.40862095753351846, Current loss:0.31301403045654297
Train: batch: 40/750, Average loss:0.41205747947096827, Current loss:0.6436066627502441
Train: batch: 50/750, Average loss:0.405461967587471, Current loss:0.3688463866710663
Train: batch: 60/750, Average loss:0.4102784335613251, Current loss:0.3638272285461426
Train: batch: 70/750, Average loss:0.40467696615627835, Current loss:0.5133880972862244
Train: batch: 80/750, Average loss:0.3950720202177763, Current loss:0.3146379888057709
Train: batch: 90/750, Average loss:0.3852678866850005, Current loss:0.28135284781455994
Train: batch: 100/750, Average loss:0.38091221541166304, Current loss:0.22692283987998962
Train: batch: 110/750, Average loss:0.37284686470573597, Current loss:0.33059027791023254
Train: bat

## MBPP Dataset

In [17]:
for iter in range(2):
    print(f"Iteration {iter+1}:")
    fine_tune(model, mbpp_train_loader, mbpp_val_loader, epochs=1, learning_rate=1e-6)
    print(f"BLEU score after {(iter+1)} iterations:", get_bleu_score(model, tokenizer, mbpp_test_loader, MBPP_Test_DF))

Iteration 1:
Epoch 1/1:
Train: batch: 10/98, Average loss:0.10090035013854504, Current loss:0.05275237187743187
Train: batch: 20/98, Average loss:0.0918101727962494, Current loss:0.08922037482261658
Train: batch: 30/98, Average loss:0.08622863839070002, Current loss:0.04046088084578514
Train: batch: 40/98, Average loss:0.0826756063848734, Current loss:0.025000154972076416
Train: batch: 50/98, Average loss:0.08427223935723305, Current loss:0.11450810730457306
Train: batch: 60/98, Average loss:0.08118659012640515, Current loss:0.06415283679962158
Train: batch: 70/98, Average loss:0.08036466078566654, Current loss:0.04563227668404579
Train: batch: 80/98, Average loss:0.08089310105424374, Current loss:0.0637287124991417
Train: batch: 90/98, Average loss:0.08174063333620628, Current loss:0.11672080308198929
Train: batch: 98/98, Average loss:0.08078702487887776, Current loss:0.04552919417619705
Train Loss: 0.08078702487887776
Valid: batch: 10/13, Average loss:0.039674695767462256, Current Lo

## Remove the masks (make them permanent)

In [18]:
for module, _ in parameters_to_prune:
    prune.remove(module, 'weight')

# Performance after Fine-tuning

## MBPP Dataset

In [19]:
print("BLEU score:", get_bleu_score(model, tokenizer, mbpp_test_loader, MBPP_Test_DF))

BLEU score: 0.12855320093463174


## Python-API Dataset

In [20]:
print("BLEU Score:", get_bleu_score(model, tokenizer, api_test_loader, API_Test_DF))

BLEU Score: 0.2917530479398626


# Save Model

In [21]:
model.save_pretrained('model/magnitude_pruning_20')
tokenizer.save_pretrained('model/magnitude_pruning_20')

('model/magnitude_pruning_20/tokenizer_config.json',
 'model/magnitude_pruning_20/special_tokens_map.json',
 'model/magnitude_pruning_20/vocab.json',
 'model/magnitude_pruning_20/merges.txt',
 'model/magnitude_pruning_20/added_tokens.json')