In [1]:
import os
import sys
import argparse
from typing import List

import torch
import transformers
from datasets import load_dataset, load_from_disk, DatasetDict

from peft import (
    LoraConfig,
    get_peft_model,
    get_peft_model_state_dict,
    prepare_model_for_int8_training,
    set_peft_model_state_dict,
)
from Prompter import Prompter, ZeroPrompter

device = "cuda" if torch.cuda.is_available() else "cpu"

  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(


In [8]:
####### PARAMS #######

MODEL_PRUNE = '../cache/jeffwan_llama_7b_hf_whitening_only_0.8.pt'
DATA_PATH = 'python_code_instructions_18k_alpaca_ru'
OUTPUT_DIR = '../cache/jeffwan_llama_7b_hf_whitening_only_0.8/'
LORA_R = 8 # Rank of lora 
LORA_ALPHA = 16
NUM_EPOCHS = 2 # Number of fine tune epochs
LEARNING_RATE = 1e-4
BATCH_SIZE = 64
MICRO_BATCH_SIZE = 4 # number of different forwards before accumulation grads in each batch 
CUTOFF_LENGTH = 400
TRAIN_ON_INPUTS = False # Should model learn on input text also or not
LORA_TARGET_MODULES = "q_v_proj,q_u_proj,k_v_proj,k_u_proj,v_u_proj,\
v_v_proj,o_u_proj,o_v_proj,gate_u_proj,gate_v_proj,down_u_proj,down_v_proj,up_u_proj,up_v_proj,\
q_proj,k_proj,v_proj,o_proj,gate_proj,up_proj,down_proj"
LORA_DROPOUT = 0.05
VAL_SIZE = 10
EVAL_STEPS = 0

In [5]:
from transformers import AutoModel, AutoTokenizer

model = AutoModel.from_pretrained('../cache/Witening_SVD_0.3_postrain/')
tokenizer = AutoTokenizer.from_pretrained('../cache/Witening_SVD_0.3_postrain_Tokenizer/')

Some weights of LlamaModel were not initialized from the model checkpoint at ../cache/Witening_SVD_0.3_postrain/ and are newly initialized: ['model.layers.17.mlp.gate_proj.weight', 'model.layers.31.self_attn.o_proj.weight', 'model.layers.30.self_attn.o_proj.weight', 'model.layers.14.mlp.up_proj.weight', 'model.layers.24.mlp.down_proj.weight', 'model.layers.10.mlp.down_proj.weight', 'model.layers.2.self_attn.k_proj.weight', 'model.layers.1.mlp.down_proj.weight', 'model.layers.24.self_attn.v_proj.weight', 'model.layers.29.self_attn.o_proj.weight', 'model.layers.20.mlp.down_proj.weight', 'model.layers.29.mlp.down_proj.weight', 'model.layers.14.self_attn.q_proj.weight', 'model.layers.20.self_attn.q_proj.weight', 'model.layers.4.mlp.down_proj.weight', 'model.layers.23.self_attn.o_proj.weight', 'model.layers.30.mlp.down_proj.weight', 'model.layers.14.mlp.gate_proj.weight', 'model.layers.22.mlp.down_proj.weight', 'model.layers.19.mlp.down_proj.weight', 'model.layers.31.mlp.gate_proj.weight', 

In [7]:
model 

LlamaModel(
  (embed_tokens): Embedding(32256, 4096)
  (layers): ModuleList(
    (0-31): 32 x LlamaDecoderLayer(
      (self_attn): LlamaAttention(
        (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
        (k_proj): Linear(in_features=4096, out_features=4096, bias=False)
        (v_proj): Linear(in_features=4096, out_features=4096, bias=False)
        (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
        (rotary_emb): LlamaLinearScalingRotaryEmbedding()
      )
      (mlp): LlamaMLP(
        (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)
        (up_proj): Linear(in_features=4096, out_features=11008, bias=False)
        (down_proj): Linear(in_features=11008, out_features=4096, bias=False)
        (act_fn): SiLUActivation()
      )
      (input_layernorm): LlamaRMSNorm()
      (post_attention_layernorm): LlamaRMSNorm()
    )
  )
  (norm): LlamaRMSNorm()
)

In [9]:
# Load Pruned Model

# pruned_dict = torch.load(MODEL_PRUNE, map_location='cpu')
# tokenizer, model = pruned_dict['tokenizer'], pruned_dict['model']
gradient_accumulation_steps = BATCH_SIZE // MICRO_BATCH_SIZE

prompter = Prompter('alpaca')

if device == 'cuda':
    model.half()

tokenizer.pad_token_id = 0
tokenizer.padding_side = "left"

def tokenize(prompt, add_eos_token=True):
    result = tokenizer(
        prompt,
        truncation=True,
        max_length=CUTOFF_LENGTH,
        padding=False,
        return_tensors=None,
    )
    if (
        result["input_ids"][-1] != tokenizer.eos_token_id
        and len(result["input_ids"]) < CUTOFF_LENGTH
        and add_eos_token
    ):
        result["input_ids"].append(tokenizer.eos_token_id)
        result["attention_mask"].append(1)

    result["labels"] = result["input_ids"].copy()

    return result

def generate_and_tokenize_prompt(data_point):
    full_prompt = prompter.generate_prompt(
        data_point["instruction"],
        data_point["input"],
        data_point["output"],
    )
    tokenized_full_prompt = tokenize(full_prompt)
    if TRAIN_ON_INPUTS:
        user_prompt = prompter.generate_prompt(
            data_point["instruction"], data_point["input"]
        )
        tokenized_user_prompt = tokenize(
            user_prompt, add_eos_token=False
        )
        user_prompt_len = len(tokenized_user_prompt["input_ids"])

        if False:
            user_prompt_len -= 1

        tokenized_full_prompt["labels"] = [
            -100
        ] * user_prompt_len + tokenized_full_prompt["labels"][
            user_prompt_len:
        ]  # could be sped up, probably
    return tokenized_full_prompt

def split_and_tokenizer(test_data, tokenizer, seq_len, field_name):
    test_ids = tokenizer("\n\n".join(test_data[field_name]), return_tensors='pt').input_ids[0]
    test_ids_batch = []
    nsamples = test_ids.numel() // seq_len

    test_set = []
    for i in range(nsamples):
        batch = test_ids[(i * seq_len):((i + 1) * seq_len)]
        test_set.append({
            'input_ids': batch,
            'labels': batch
        })
    return test_set

# Prepare For LoRA
model = prepare_model_for_int8_training(model)
config = LoraConfig(
    r=LORA_R,
    lora_alpha=LORA_ALPHA,
    target_modules=LORA_TARGET_MODULES.split(","),
    lora_dropout=LORA_DROPOUT,
    bias="none",
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, config)
model.print_trainable_parameters()  

# Load Train Dataset
try:
    data = load_dataset(DATA_PATH)
except:
    data = DatasetDict({'train':load_from_disk(DATA_PATH)})
    
train_val = data["train"].train_test_split(
    test_size=VAL_SIZE, shuffle=True, seed=42
)
train_data = (
    train_val["train"].shuffle().map(generate_and_tokenize_prompt)
)
val_data = {
    DATA_PATH: train_val["test"].shuffle().map(generate_and_tokenize_prompt),
}

trainer = transformers.Trainer(
    model=model,
    train_dataset=train_data,
    eval_dataset=val_data,
    args=transformers.TrainingArguments(
        per_device_train_batch_size=MICRO_BATCH_SIZE,
        gradient_accumulation_steps=gradient_accumulation_steps,
        warmup_steps=100,
        num_train_epochs=NUM_EPOCHS,
        learning_rate=LEARNING_RATE,
        fp16=True,
        logging_steps=10,
        logging_first_step=True,
        optim="adamw_torch",
        evaluation_strategy="steps",
        save_strategy="steps",
        eval_steps=EVAL_STEPS,
        save_steps=200,
        output_dir=OUTPUT_DIR,
        save_total_limit=20,
        load_best_model_at_end=True,
        ddp_find_unused_parameters=None,
        group_by_length=False,
        report_to="none",
        run_name="none",
        metric_for_best_model="{}_loss".format(DATA_PATH),
    ),
    data_collator=transformers.DataCollatorForSeq2Seq(
        tokenizer, pad_to_multiple_of=8, return_tensors="pt", padding=True
    ),
)
model.config.use_cache = False
old_state_dict = model.state_dict

trainer.train(resume_from_checkpoint=False)

model.state_dict = old_state_dict
model.save_pretrained(OUTPUT_DIR)

trainable params: 19,988,480 || all params: 6,628,380,672 || trainable%: 0.301559023072355


Map:   0%|          | 0/9990 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

comet_ml is installed but `COMET_API_KEY` is not set.
Detected kernel version 5.4.270, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


TypeError: LlamaModel.forward() got an unexpected keyword argument 'labels'