In [1]:
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, Trainer, TrainingArguments, DataCollatorForLanguageModeling

from transformers import BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, PeftModel, PeftConfig

import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


bin D:\.conda_env\yothalia\lib\site-packages\bitsandbytes\libbitsandbytes_cuda117.dll


## Install Model and Quantize

In [2]:
nf8_config = BitsAndBytesConfig(
   load_in_8bit=True,
   bnb_8bit_quant_type="nf8",
   bnb_8bit_use_double_quant=True,
   bnb_8bit_compute_dtype=torch.bfloat16
)

config = LoraConfig(
        r=16,
        lora_alpha=32,
        lora_dropout=0.02,
        target_modules=["q_proj","k_proj","v_proj","o_proj","gate_proj","up_proj","down_proj","lm_head"],
        bias="none",
        task_type="CAUSAL_LM",
    )

In [9]:
tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b", 
                                          
                                          trust_remote_code=True,
                                          cache_dir='../yothalia/server/model_weights/internlm/internlm-chat-7b')

In [None]:
tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b", 
                                          
                                          trust_remote_code=True,
                                          cache_dir='../yothalia/server/model_weights/internlm/internlm-chat-7b')
# Set `torch_dtype=torch.float16` to load model in float16, otherwise it will be loaded as float32 and cause OOM Error.
model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b",
                                             quantization_config=nf8_config,
                                             torch_dtype=torch.float16, 
                                             trust_remote_code=True,
                                             cache_dir='../yothalia/server/model_weights/internlm/internlm-chat-7b')

In [None]:
special_tokens_dict = {'additional_special_tokens': 
                       ['<<SYS>>','<</SYS>>','[INST]','[/INST]']}
num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
model.resize_token_embeddings(len(tokenizer))

In [None]:
tokenizer.save_pretrained('../yothalia/server/model_weights/internlm/internlm-chat-7b-finetune')

# Optionally, save the model as well
model.save_pretrained('../yothalia/server/model_weights/internlm/internlm-chat-7b-finetune')

In [None]:
model = get_peft_model(model, config)
model.print_trainable_parameters()

In [None]:
model.save_pretrained("../yothalia/server/model_weights/internlm/internlm-chat-7b-finetune-lora")

## Load Model and Adapter

In [5]:
model = AutoModelForCausalLM.from_pretrained("../yothalia/server/model_weights/internlm/internlm-chat-7b-finetune", 
                                                load_in_4bit= True,
                                                #peft_config=config,
                                                trust_remote_code=True)


Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████| 2/2 [00:44<00:00, 22.14s/it]


In [6]:
tokenizer = AutoTokenizer.from_pretrained("../yothalia/server/model_weights/internlm/internlm-chat-7b-finetune",
                                            trust_remote_code=True)

In [7]:
peft_model_id = "../yothalia/server/model_weights/internlm/internlm-chat-7b-finetune-lora"
config = PeftConfig.from_pretrained(peft_model_id)
model = PeftModel.from_pretrained(model, peft_model_id)
#model.add_adapter(config)

In [8]:
for name, param in model.named_parameters():
    if 'lora' in name:
        param.requires_grad = True

In [9]:
model.print_trainable_parameters()

trainable params: 41,693,248 || all params: 7,363,674,176 || trainable%: 0.5662016950164418


## Set Trainer

#### Load dataset

In [10]:
from torch.utils.data import DataLoader
import pandas as pd

In [46]:
df = pd.read_csv('../train_sample/csv/train.csv',index_col=0).reset_index(drop=True)
# shuffle data
#df = df.sample(frac=1).reset_index(drop=True)

In [48]:
df = df.map(lambda x: tokenizer(x, padding='max_length', truncation=True, max_length=512))


In [56]:
df

Unnamed: 0,train
0,"[input_ids, attention_mask]"
1,"[input_ids, attention_mask]"
2,"[input_ids, attention_mask]"
3,"[input_ids, attention_mask]"
4,"[input_ids, attention_mask]"
...,...
20017,"[input_ids, attention_mask]"
20018,"[input_ids, attention_mask]"
20019,"[input_ids, attention_mask]"
20020,"[input_ids, attention_mask]"


In [49]:
df_test = df[-200:].reset_index(drop=True)

In [50]:
df_train = df[:-200]

In [51]:
df_train

Unnamed: 0,train
0,"[input_ids, attention_mask]"
1,"[input_ids, attention_mask]"
2,"[input_ids, attention_mask]"
3,"[input_ids, attention_mask]"
4,"[input_ids, attention_mask]"
...,...
19817,"[input_ids, attention_mask]"
19818,"[input_ids, attention_mask]"
19819,"[input_ids, attention_mask]"
19820,"[input_ids, attention_mask]"


In [52]:
training_args = TrainingArguments(

  # Learning rate
  learning_rate=1.0e-5,

  # Number of training epochs
  num_train_epochs=3,

  # Max steps to train for (each step is a batch of data)
  # Overrides num_train_epochs, if not -1
  #max_steps=max_steps,

  # Batch size for training
  per_device_train_batch_size=4,

  # Directory to save model checkpoints
  output_dir='./ckp',

  # Other arguments
  overwrite_output_dir=False, # Overwrite the content of the output directory
  disable_tqdm=False, # Disable progress bars
  eval_steps=1, # Number of update steps between two evaluations
  save_steps=1, # After # steps model is saved
  warmup_steps=1, # Number of warmup steps for learning rate scheduler
  per_device_eval_batch_size=4, # Batch size for evaluation
  evaluation_strategy="steps",
  logging_strategy="steps",
  logging_steps=1,
  optim="adamw_torch",
  gradient_accumulation_steps = 4,
  gradient_checkpointing=False,

  # Parameters for early stopping
  load_best_model_at_end=True,
  save_total_limit=4,
  greater_is_better=False,
    
  # Parallel Training Param

)
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)

In [53]:
print('Parallel Status:',training_args.parallel_mode)

Parallel Status: ParallelMode.NOT_PARALLEL


In [54]:
from transformers import Trainer
trainer = Trainer(
    model,
    training_args,
    train_dataset=df[0:100],
    eval_dataset=df[101:121],
    data_collator=data_collator,
)

In [55]:
trainer.train()

KeyError: 88

In [25]:
df["train"][100:120].reset_index(drop=True)

0     [input_ids, attention_mask]
1     [input_ids, attention_mask]
2     [input_ids, attention_mask]
3     [input_ids, attention_mask]
4     [input_ids, attention_mask]
5     [input_ids, attention_mask]
6     [input_ids, attention_mask]
7     [input_ids, attention_mask]
8     [input_ids, attention_mask]
9     [input_ids, attention_mask]
10    [input_ids, attention_mask]
11    [input_ids, attention_mask]
12    [input_ids, attention_mask]
13    [input_ids, attention_mask]
14    [input_ids, attention_mask]
15    [input_ids, attention_mask]
16    [input_ids, attention_mask]
17    [input_ids, attention_mask]
18    [input_ids, attention_mask]
19    [input_ids, attention_mask]
Name: train, dtype: object