In [1]:
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, Trainer, TrainingArguments, DataCollatorForLanguageModeling

from transformers import BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, PeftModel, PeftConfig

import pandas as pd

bin D:\.conda_env\yothalia\lib\site-packages\bitsandbytes\libbitsandbytes_cuda117.dll


## Load Model and Adapter

In [2]:
model = AutoModelForCausalLM.from_pretrained("../yothalia/server/model_weights/internlm/internlm-chat-7b-finetune-int8", 
                                                load_in_4bit=True,
                                                #peft_config=config,
                                                trust_remote_code=True)


In [3]:
tokenizer = AutoTokenizer.from_pretrained("../yothalia/server/model_weights/internlm/internlm-chat-7b-finetune-int8",
                                            trust_remote_code=True)

In [4]:
peft_model_id = "../yothalia/server/model_weights/internlm/internlm-chat-7b-finetune-lora"
config = PeftConfig.from_pretrained(peft_model_id)
model = PeftModel.from_pretrained(model, peft_model_id)
#model.add_adapter(config)

In [5]:
for name, param in model.named_parameters():
    if 'lora' in name:
        param.requires_grad = True

In [6]:
model.print_trainable_parameters()

trainable params: 41,693,248 || all params: 7,363,674,176 || trainable%: 0.5662016950164418


## Set Trainer

#### Load dataset

In [7]:
from torch.utils.data import DataLoader
import pandas as pd

In [8]:
df = pd.read_csv('../train_sample/csv/train_emoji_clean.csv',index_col=0)

In [9]:
df = df.map(lambda x: tokenizer(x, truncation=True)).reset_index(drop=True)


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


In [10]:
# shuffle data
#df = df.sample(frac=1).reset_index(drop=True)

In [11]:
df.head()

Unnamed: 0,train
0,"[input_ids, attention_mask]"
1,"[input_ids, attention_mask]"
2,"[input_ids, attention_mask]"
3,"[input_ids, attention_mask]"
4,"[input_ids, attention_mask]"


In [12]:
df_test = df[-200:-1].reset_index(drop=True)

In [14]:
df_train = df[:-200].reset_index(drop=True)

In [15]:
training_args = TrainingArguments(

  # Learning rate
  learning_rate=1.0e-5,

  # Number of training epochs
  num_train_epochs=3,

  # Max steps to train for (each step is a batch of data)
  # Overrides num_train_epochs, if not -1
  #max_steps=max_steps,

  # Batch size for training
  per_device_train_batch_size=4,

  # Directory to save model checkpoints
  output_dir='./ckp',

  # Other arguments
  overwrite_output_dir=False, # Overwrite the content of the output directory
  disable_tqdm=False, # Disable progress bars
  eval_steps=1, # Number of update steps between two evaluations
  save_steps=1, # After # steps model is saved
  warmup_steps=1, # Number of warmup steps for learning rate scheduler
  per_device_eval_batch_size=4, # Batch size for evaluation
  evaluation_strategy="steps",
  logging_strategy="steps",
  logging_steps=1,
  optim="adamw_torch",
  gradient_accumulation_steps = 4,
  gradient_checkpointing=False,

  # Parameters for early stopping
  load_best_model_at_end=True,
  save_total_limit=1,
  greater_is_better=False,
    
  # Parallel Training Param

)
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)

In [16]:
print('Parallel Status:',training_args.parallel_mode)

Parallel Status: ParallelMode.NOT_PARALLEL


In [17]:
from transformers import Trainer
trainer = Trainer(
    model,
    training_args,
    train_dataset=df['train'],
    #eval_dataset=df['train'],
    data_collator=data_collator,
)

In [18]:
trainer.train()

KeyboardInterrupt: 

In [64]:
df.loc[20]

Unnamed: 0,train
20,"[input_ids, attention_mask]"
20,"[input_ids, attention_mask]"
20,"[input_ids, attention_mask]"
20,"[input_ids, attention_mask]"


In [66]:
df.reset_index(drop=True).loc[17896]

train    [input_ids, attention_mask]
Name: 17896, dtype: object

In [None]:
df["train"][100:120].reset_index(drop=True)