In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "2"

In [3]:
import os
import pandas as pd
import torch
from dataset_preprocessing import TokenInfo
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import itertools
import pandas as pd
from tqdm import tqdm

## Importances

In [4]:
def get_importances():
    # print("this is wrong")
    dir = "./new_importances_data"
    imp_files = os.listdir(dir)
    imp_files = [file for file in imp_files if file.endswith(".pkl")]
    importances = {}
    for imp_file in tqdm(imp_files):
        importances.update(pd.read_pickle(f"{dir}/{imp_file}"))
    return importances

In [5]:
# imps = get_importances()

In [6]:
def get_avg_imporances(importances):
    avg_imps = [torch.zeros_like(imp) for imp in list(importances.values())[0]]
    for token, imps in tqdm(importances.items()):
        for i, layer_imps in enumerate(imps):
            avg_imps[i] += layer_imps / len(importances)
    # TODO think harder about averaging method
    return avg_imps

In [7]:
# avg_importances = get_avg_imporances(imps)

In [8]:
# pd.to_pickle(avg_importances, "./avg_importances.pkl")

In [9]:
avg_importances = pd.read_pickle("./avg_importances.pkl")

In [10]:
len(avg_importances)

24

## Model

In [11]:
model_id = "microsoft/phi-1_5"
model_revision = "349cf8b5e81fd5f791d1740da5de1313a0419bbd" # latest as of feb 1st

In [12]:
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)

In [13]:
vocab = tokenizer.get_vocab()
len(vocab)

50295

In [14]:
# tokenizer.decode(token_info.get_prefixes(top_tokens[1000][0], 9, 10)[0])

In [15]:
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    revision=model_revision,
    trust_remote_code=True,
    # be careful with this?
    # torch_dtype=torch.float16,
    # attn_implementation="flash_attention_2",
)

## Prune Model

In [16]:
from prunners import prune_mlps_holistically
from importances import get_mlps

In [17]:
mlps = get_mlps(model)

In [18]:
len(mlps), len(avg_importances)

(24, 24)

In [19]:
avg_importances = dict(zip(mlps, avg_importances))

In [20]:
prune_mlps_holistically(avg_importances, 0.2)

In [23]:
model

PhiForCausalLM(
  (model): PhiModel(
    (embed_tokens): Embedding(51200, 2048)
    (embed_dropout): Dropout(p=0.0, inplace=False)
    (layers): ModuleList(
      (0): PhiDecoderLayer(
        (self_attn): PhiAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=True)
          (k_proj): Linear(in_features=2048, out_features=2048, bias=True)
          (v_proj): Linear(in_features=2048, out_features=2048, bias=True)
          (dense): Linear(in_features=2048, out_features=2048, bias=True)
          (rotary_emb): PhiRotaryEmbedding()
        )
        (mlp): PhiMLP(
          (activation_fn): NewGELUActivation()
          (fc1): Linear(in_features=2048, out_features=6633, bias=True)
          (fc2): Linear(in_features=6633, out_features=2048, bias=True)
        )
        (input_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
        (resid_dropout): Dropout(p=0.0, inplace=False)
      )
      (1): PhiDecoderLayer(
        (self_attn): PhiAtten

## Train model

In [24]:
from peft import LoraConfig, PeftConfig
import transformers

In [25]:
from post_training import get_lora_config, get_training_arguments
from dataset import get_baseline_dataset
from trl import SFTTrainer



In [26]:
lora_config = get_lora_config()
training_arguments = get_training_arguments("./tmp")

In [27]:
training_arguments.save_steps = 400

In [28]:
model.cuda();

In [29]:
model.config.use_cache = False
model.config.pretraining_tp = 1
model.gradient_checkpointing_enable()

In [30]:
dataset = get_baseline_dataset()
train_data, eval_data = dataset["train"], dataset["test"]

reading pickle


In [31]:
tokenizer.pad_token = tokenizer.eos_token

In [32]:
training_arguments.save_strategy="no"

In [33]:
trainer = SFTTrainer(
    model=model,
    train_dataset=train_data,
    eval_dataset=eval_data,
    peft_config=lora_config,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=False,
    dataset_text_field="text",
    max_seq_length=1024, # tweak this
    # TODO: think harder about the datacollator
    # data_collator=transformers.DataCollatorForSeq2Seq(
    #     tokenizer, pad_to_multiple_of=8, return_tensors="pt", padding=True
    # ),
)

Map:   0%|          | 0/50000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [34]:
trainer.evaluate()

You're using a CodeGenTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


{'eval_loss': 3.494065761566162,
 'eval_runtime': 169.1167,
 'eval_samples_per_second': 11.826,
 'eval_steps_per_second': 1.478}

In [35]:
train_res = trainer.train()

Step,Training Loss,Validation Loss
100,3.2392,3.195374
200,3.1561,3.144388
300,3.1693,3.128358
400,3.0862,3.120554
500,3.0676,3.115312
600,3.1424,3.111253
700,3.0915,3.108431
800,3.1582,3.105942


IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [36]:
pd.to_pickle(trainer.state, "./tmp/trainer_state_llm_pruner_style_0.2ratio_holistic.pkl")

In [46]:
trainer_state = trainer.state
pd.DataFrame(trainer_state.log_history).dropna(subset = ["eval_loss"])

Unnamed: 0,loss,learning_rate,epoch,step,eval_loss,eval_runtime,eval_samples_per_second,eval_steps_per_second,train_runtime,train_samples_per_second,train_steps_per_second,total_flos,train_loss
11,,,0.12,100,3.195374,170.2413,11.748,1.469,,,,,
22,,,0.24,200,3.144388,170.1082,11.757,1.47,,,,,
33,,,0.36,300,3.128358,170.1579,11.754,1.469,,,,,
44,,,0.48,400,3.120554,170.0755,11.759,1.47,,,,,
55,,,0.6,500,3.115312,170.1386,11.755,1.469,,,,,
66,,,0.72,600,3.111253,170.0991,11.758,1.47,,,,,
77,,,0.84,700,3.108431,170.1744,11.753,1.469,,,,,
88,,,0.96,800,3.105942,170.1269,11.756,1.469,,,,,
99,,,1.08,900,3.104341,170.0521,11.761,1.47,,,,,
110,,,1.2,1000,3.10314,170.0689,11.76,1.47,,,,,


### Evaluation

In [38]:
from evaluation import evaluate_on_nlp_tasks

In [39]:
model.eval();

In [40]:
with torch.no_grad():
    eval_res = evaluate_on_nlp_tasks(model, tokenizer, limit=300)

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.
100%|█| 3000/3000 [01:31<00:00, 
fatal: not a git repository (or any parent up to mount point /)
Stopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).


In [41]:
eval_res["results"]

{'hellaswag': {'acc,none': 0.45,
  'acc_norm,none': 0.5233333333333333,
  'alias': 'hellaswag'},
 'piqa': {'acc,none': 0.7333333333333333,
  'acc_norm,none': 0.7366666666666667,
  'alias': 'piqa'},
 'boolq': {'acc,none': 0.6, 'alias': 'boolq'},
 'winogrande': {'acc,none': 0.6733333333333333, 'alias': 'winogrande'}}

In [42]:
eval_res = evaluate_on_nlp_tasks(model, tokenizer, limit=1000)

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.
100%|█| 10000/10000 [05:04<00:00
fatal: not a git repository (or any parent up to mount point /)
Stopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).


In [43]:
eval_res["results"]

{'hellaswag': {'acc,none': 0.428,
  'acc_norm,none': 0.531,
  'alias': 'hellaswag'},
 'piqa': {'acc,none': 0.742, 'acc_norm,none': 0.737, 'alias': 'piqa'},
 'boolq': {'acc,none': 0.622, 'alias': 'boolq'},
 'winogrande': {'acc,none': 0.655, 'alias': 'winogrande'}}

## Save

In [44]:
model.cpu();

In [45]:
torch.save(model.state_dict(), "./tmp/model_llm_prunner_style_0.2_ratio_holistic_state_dict")