# Iterative Pruning Debug Notebook

This notebook tests the iterative pruning approach step by step for debugging.

In [1]:
import os

# Device selection
os.environ["CUDA_DEVICE_ORDER"] = 'PCI_BUS_ID'
i = 2  # device number to use
os.environ["CUDA_VISIBLE_DEVICES"] = f'{i}'

import torch
import sys
sys.path.append('../..')

from src.pruninghealing import Trainer, DatasetLoader, IterativePruner
from src.pruninghealing.utils import load_model_and_tokenizer, calculate_perplexity, get_model_layers
from src.pruninghealing.logger import Logger

print(f'Using GPU device {i}: {torch.cuda.get_device_name(0)}' if torch.cuda.is_available() else 'Using CPU')

Using GPU device 2: NVIDIA A100-SXM4-80GB


## Load Model and Dataset

In [None]:
# Configuration
MODEL_PATH = "../checkpoints/llama3.1-8b"  # Change to your model
WORKSPACE = "../../workspace/iterative_debug"
NUM_LAYERS_TO_PRUNE = 3
START_LAYER = 17

# Load model and tokenizer
print("Loading model...")
model, tokenizer = load_model_and_tokenizer(MODEL_PATH)

print(f"Model loaded: {get_model_layers(model)} layers")
print(f"Model type: {model.config.model_type}")

Loading model...


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [None]:
# Load dataset
print("Loading dataset...")
dataset_loader = DatasetLoader(tokenizer)
dataset_loader.load_wikitext(train_size=500, eval_size=50)  # Small for debugging

print(f"Dataset loaded: {len(dataset_loader.train_dataset)} train, {len(dataset_loader.eval_dataset)} eval")

Loading dataset...
Dataset loaded: 1000 train, 100 eval


## Calculate Baseline Perplexity

In [None]:
# Calculate baseline perplexity
print("Calculating baseline perplexity...")
baseline_ppl = calculate_perplexity(model, tokenizer, max_samples=20)
print(f"Baseline perplexity: {baseline_ppl:.3f}")

Calculating baseline perplexity...
Baseline perplexity: 10.907


## Initialize Components

In [None]:
# Initialize pruner, trainer, and logger
pruner = IterativePruner(model, tokenizer, WORKSPACE)
trainer = Trainer(model, tokenizer, WORKSPACE)
logger = Logger(WORKSPACE)

# Log baseline
logger.log_step({
    "step": 0,
    "action": "baseline",
    "layers_remaining": get_model_layers(model),
    "perplexity": baseline_ppl
})

print("Components initialized")

Components initialized


## Test Question Setup

In [None]:
# Fixed test question for quality evaluation
TEST_PROMPT = "What is the capital of France?"

def test_model_quality(model, tokenizer, prompt=TEST_PROMPT):
    """Test model response quality"""
    inputs = tokenizer(prompt, return_tensors="pt")
    inputs = {k: v.to(model.device) for k, v in inputs.items()}
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=20,
            do_sample=False,
            pad_token_id=tokenizer.eos_token_id
        )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# Test baseline
print(f"Test prompt: {TEST_PROMPT}")
baseline_response = test_model_quality(model, tokenizer)
print(f"Baseline response: {baseline_response}")

Test prompt: What is the capital of France?
Baseline response: What is the capital of France? Paris
What is the capital of France?
The capital of France is Paris. Paris is the largest


## Step 1: Remove First Layer

In [None]:
# Initialize current model and step
current_model = model
step = 0
layer_idx = START_LAYER + step

print(f"=== Step {step+1}: Removing layer {layer_idx} ===")

# Remove layer
current_model = pruner._remove_layer(current_model, layer_idx)
layers_remaining = get_model_layers(current_model)

print(f"Layers remaining: {layers_remaining}")

=== Step 1: Removing layer 17 ===
Layers remaining: 31


In [None]:
# Test after pruning
ppl_after_prune = calculate_perplexity(current_model, tokenizer, max_samples=20)
response_after_prune = test_model_quality(current_model, tokenizer)

print(f"Perplexity after pruning: {ppl_after_prune:.3f} (change: {ppl_after_prune - baseline_ppl:+.3f})")
print(f"Response after pruning: {response_after_prune}")

Perplexity after pruning: 12.589 (change: +1.682)
Response after pruning: What is the capital of France? Paris
What is the capital of France?
The capital of France is Paris. Paris is a city


In [None]:
# Apply LoRA
print(f"Applying LoRA to layer {layer_idx}...")
current_model = pruner._apply_lora(current_model, layer_idx)

# Test after LoRA
ppl_before_train = calculate_perplexity(current_model, tokenizer, max_samples=20)
response_before_train = test_model_quality(current_model, tokenizer)

print(f"Perplexity after LoRA: {ppl_before_train:.3f}")
print(f"Response after LoRA: {response_before_train}")

Applying LoRA to layer 17...
Perplexity after LoRA: 12.589
Response after LoRA: What is the capital of France? Paris
What is the capital of France?
The capital of France is Paris. Paris is a city


In [None]:
type(current_model)

peft.peft_model.PeftModelForCausalLM

In [None]:
# Train model
print("Training model...")
trainer.model = current_model
current_model = trainer.train(dataset_loader, max_steps=50)

# Test after training
ppl_after_train = calculate_perplexity(current_model, tokenizer, max_samples=20)
response_after_train = test_model_quality(current_model, tokenizer)

print(f"Perplexity after training: {ppl_after_train:.3f} (improvement: {ppl_before_train - ppl_after_train:+.3f})")
print(f"Response after training: {response_after_train}")

  super().__init__(*args, **kwargs)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Training model...


Step,Training Loss
0,5.800822
50,10.4576


Perplexity after training: 11.443 (improvement: +1.146)
Response after training: What is the capital of France? Paris
What is the capital of France?
The capital of France is Paris. Paris is a city


In [None]:
# Log step 1
logger.log_step({"action": "prune", "step": step + 1, "layer": layer_idx, "ppl": ppl_after_prune})
logger.log_step({"action": "train", "step": step + 1, "layer": layer_idx, "ppl": ppl_after_train})

print(f"Step {step+1} completed!")

Step 1 completed!


## Step 2: Remove Second Layer

In [13]:
# Step 2
step = 1
layer_idx = START_LAYER + step

print(f"=== Step {step+1}: Removing layer {layer_idx} ===")

# Remove layer
current_model = pruner._remove_layer(current_model, layer_idx)
layers_remaining = get_model_layers(current_model)

print(f"Layers remaining: {layers_remaining}")

=== Step 2: Removing layer 18 ===
Layers remaining: 30


In [14]:
# Test after pruning
ppl_after_prune = calculate_perplexity(current_model, tokenizer, max_samples=20)
response_after_prune = test_model_quality(current_model, tokenizer)

print(f"Perplexity after pruning: {ppl_after_prune:.3f}")
print(f"Response after pruning: {response_after_prune}")

Perplexity after pruning: 12.878
Response after pruning: What is the capital of France? Paris
What is the capital of Australia? Canberra
What is the capital of China? Beijing



In [15]:
# Apply LoRA and train
current_model = pruner._apply_lora(current_model, layer_idx)
ppl_before_train = calculate_perplexity(current_model, tokenizer, max_samples=20)

trainer.model = current_model
current_model = trainer.train(dataset_loader, max_steps=50)

ppl_after_train = calculate_perplexity(current_model, tokenizer, max_samples=20)
response_after_train = test_model_quality(current_model, tokenizer)

print(f"Perplexity after training: {ppl_after_train:.3f}")
print(f"Response after training: {response_after_train}")

# Log step 2
logger.log_step({"action": "prune", "step": step + 1, "layer": layer_idx, "ppl": ppl_after_prune})
logger.log_step({"action": "train", "step": step + 1, "layer": layer_idx, "ppl": ppl_after_train})

  super().__init__(*args, **kwargs)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
0,5.795442
50,10.4607


Perplexity after training: 11.480
Response after training: What is the capital of France? Paris
What is the capital of Australia? Canberra
What is the capital of China? Beijing



## Step 3: Remove Third Layer

In [16]:
# Step 3
step = 2
layer_idx = START_LAYER + step

print(f"=== Step {step+1}: Removing layer {layer_idx} ===")

# Remove layer
current_model = pruner._remove_layer(current_model, layer_idx)
layers_remaining = get_model_layers(current_model)

print(f"Layers remaining: {layers_remaining}")

=== Step 3: Removing layer 19 ===
Layers remaining: 29


In [17]:
# Test and train
ppl_after_prune = calculate_perplexity(current_model, tokenizer, max_samples=20)
response_after_prune = test_model_quality(current_model, tokenizer)

current_model = pruner._apply_lora(current_model, layer_idx)
trainer.model = current_model
current_model = trainer.train(dataset_loader, max_steps=50)

ppl_after_train = calculate_perplexity(current_model, tokenizer, max_samples=20)
response_after_train = test_model_quality(current_model, tokenizer)

print(f"Perplexity: {ppl_after_prune:.3f} → {ppl_after_train:.3f}")
print(f"Final response: {response_after_train}")

# Log step 3
logger.log_step({"action": "prune", "step": step + 1, "layer": layer_idx, "ppl": ppl_after_prune})
logger.log_step({"action": "train", "step": step + 1, "layer": layer_idx, "ppl": ppl_after_train})

  super().__init__(*args, **kwargs)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
0,5.790966
50,10.4616


Perplexity: 13.532 → 12.162
Final response: What is the capital of France? Paris
What is the population of France? 8,645,000
What is the area


## Results Analysis

In [1]:
import matplotlib.pyplot as plt
import pandas as pd

df = pd.DataFrame(logger.logs)

# Filter out baseline and get prune/train data
prune_data = df[df['action'] == 'prune'].reset_index(drop=True)
train_data = df[df['action'] == 'train'].reset_index(drop=True)
assert len(prune_data) == len(train_data), "Количество prune и train должно совпадать"

plt.figure(figsize=(12, 6))

# Plot baseline
x_vals = [0]
y_vals = [baseline_ppl]
plt.plot(x_vals, y_vals, 'bo', markersize=8, label=f'Baseline ({baseline_ppl:.3f})')

for i in range(len(prune_data)):
    # Prune (ухудшение: стрелка вверх)
    prune_ppl = prune_data.iloc[i]['ppl']
    x_vals.append(i + 0.5)
    y_vals.append(prune_ppl)
    plt.annotate('', xy=(i + 0.5, prune_ppl), xytext=(i, y_vals[-2]),
                 arrowprops=dict(arrowstyle='->', color='red', lw=2))
    plt.plot(i + 0.5, prune_ppl, 'ro', markersize=8, label='After Pruning' if i == 0 else '')
    plt.text(i + 0.5, prune_ppl + 0.2, f'{prune_ppl:.2f}', ha='center', fontsize=9)

    # Train (улучшение: стрелка вниз)
    train_ppl = train_data.iloc[i]['ppl']
    x_vals.append(i + 1)
    y_vals.append(train_ppl)
    plt.annotate('', xy=(i + 1, train_ppl), xytext=(i + 0.5, prune_ppl),
                 arrowprops=dict(arrowstyle='->', color='green', lw=2))
    plt.plot(i + 1, train_ppl, 'go', markersize=8, label='After Training' if i == 0 else '')
    plt.text(i + 1, train_ppl - 0.3, f'{train_ppl:.2f}', ha='center', fontsize=9)

# Ось X: шаги
plt.xticks([0] + [i + 0.5 for i in range(len(prune_data))] + [i + 1 for i in range(len(train_data))],
           ['Baseline'] + [f'Prune {i+1}' for i in range(len(prune_data))] + [f'Train {i+1}' for i in range(len(train_data))],
           rotation=45)

plt.xlabel('Pruning/Training Steps')
plt.ylabel('Perplexity')
plt.title('Iterative Pruning and Healing Process')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

print("\n=== DETAILED LOGS ===")
print(df)

NameError: name 'logger' is not defined