In [1]:
import torch
import torch.nn.utils.prune as prune
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

token = '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'

# Load the LLaMA model
model_name = "meta-llama/Llama-3.2-1B-Instruct" 
model = AutoModelForCausalLM.from_pretrained(model_name, token=token)
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B-Instruct")


# Define a function to apply unstructured pruning to specific layers
def apply_unstructured_pruning(model, pruning_amount=0.3):
    """
    Apply unstructured pruning to linear layers of the model.

    Args:
        model: The PyTorch model to prune.
        pruning_amount: The proportion of weights to prune (0 to 1).
    """
    for name, module in model.named_modules():
        if isinstance(module, torch.nn.Linear):
            print(f"Applying pruning to layer: {name}")
            prune.random_unstructured(module, name='weight', amount=pruning_amount)


In [2]:
# Apply pruning to the model
pruning_amount = 0.2 
apply_unstructured_pruning(model, pruning_amount)

# Check sparsity of a layer
def check_sparsity(module):
    """Check the sparsity of a given module."""
    if hasattr(module, 'weight'):
        weight = module.weight
        sparsity = 100.0 * float(torch.sum(weight == 0)) / weight.numel()
        print(f"Sparsity: {sparsity:.2f}%")

# Example: Check sparsity of the first linear layer
for name, module in model.named_modules():
    if isinstance(module, torch.nn.Linear):
        print(f"Checking sparsity of layer: {name}")
        check_sparsity(module)
        break

Applying pruning to layer: model.layers.0.self_attn.q_proj
Applying pruning to layer: model.layers.0.self_attn.k_proj
Applying pruning to layer: model.layers.0.self_attn.v_proj
Applying pruning to layer: model.layers.0.self_attn.o_proj
Applying pruning to layer: model.layers.0.mlp.gate_proj
Applying pruning to layer: model.layers.0.mlp.up_proj
Applying pruning to layer: model.layers.0.mlp.down_proj
Applying pruning to layer: model.layers.1.self_attn.q_proj
Applying pruning to layer: model.layers.1.self_attn.k_proj
Applying pruning to layer: model.layers.1.self_attn.v_proj
Applying pruning to layer: model.layers.1.self_attn.o_proj
Applying pruning to layer: model.layers.1.mlp.gate_proj
Applying pruning to layer: model.layers.1.mlp.up_proj
Applying pruning to layer: model.layers.1.mlp.down_proj
Applying pruning to layer: model.layers.2.self_attn.q_proj
Applying pruning to layer: model.layers.2.self_attn.k_proj
Applying pruning to layer: model.layers.2.self_attn.v_proj
Applying pruning to

In [3]:
# Save the pruned model
model.save_pretrained("./unstructured_prune_model")
tokenizer.save_pretrained("./unstructured_prune_model")

('./unstructured_prune_model\\tokenizer_config.json',
 './unstructured_prune_model\\special_tokens_map.json',
 './unstructured_prune_model\\tokenizer.json')