In [1]:
from transformers import AutoModelForCausalLM, OPTForCausalLM, AutoTokenizer,AutoModelForSeq2SeqLM,TrainingArguments,Trainer,DataCollatorForLanguageModeling,TextDataset
from peft import LoraConfig,TaskType,get_peft_model
from datasets import load_dataset
import json

In [2]:
peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    inference_mode=False,
    r = 16,
    lora_alpha=32,
    lora_dropout=0.1
)
model_id = 'openai-community/gpt2'

In [3]:
model = AutoModelForCausalLM.from_pretrained(model_id)
model = get_peft_model(model,peft_config)
model.print_trainable_parameters()

trainable params: 589,824 || all params: 125,029,632 || trainable%: 0.4717473694555863




In [4]:
for layer in model.get_base_model().named_modules():
    print(layer)

('', GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): lora.Linear(
            (base_layer): Conv1D()
            (lora_dropout): ModuleDict(
              (default): Dropout(p=0.1, inplace=False)
            )
            (lora_A): ModuleDict(
              (default): Linear(in_features=768, out_features=16, bias=False)
            )
            (lora_B): ModuleDict(
              (default): Linear(in_features=16, out_features=2304, bias=False)
            )
            (lora_embedding_A): ParameterDict()
            (lora_embedding_B): ParameterDict()
          )
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
 

In [6]:
tokenizer = AutoTokenizer.from_pretrained(model_id)


In [7]:
collator = DataCollatorForLanguageModeling(tokenizer=tokenizer,mlm=False)

In [8]:
training_args = TrainingArguments (
    output_dir='output/test',
    learning_rate=1e-3,
    per_device_eval_batch_size=32,
    per_device_train_batch_size=32,
    num_train_epochs=16,
    weight_decay=0.01,
    evaluation_strategy='no',
    save_strategy='epoch',
    #load_best_model_at_end='True',
)

In [9]:
def preprocess(input_path = 'intents.json'):
    with open(input_path,'r') as file:
        data = json.load(file)
    preprocess_data = []
    for intent in data['intents']:
        for patternn in intent['patterns']:
            preprocess_data.append(f'User: {patternn}\n')
            for response in intent['responses']:
                preprocess_data.append(f'Assistant: {response}\n')
    return ''.join(preprocess_data)
def save_preprocess(data,output_path = 'data/data.txt'):
    with open(output_path,'w') as file:
        file.write(data)

In [10]:
data = preprocess()
save_preprocess(data)

In [11]:
train_dataset = TextDataset(
    tokenizer=tokenizer,
    file_path='data/data.txt',
    block_size=64
)



In [12]:
trainer = Trainer(
    model = model,
    args=  training_args,
    train_dataset=train_dataset,
    tokenizer=tokenizer,
    data_collator=collator
)

In [13]:
trainer.train()



  0%|          | 0/128 [00:00<?, ?it/s]

{'train_runtime': 81.8465, 'train_samples_per_second': 49.849, 'train_steps_per_second': 1.564, 'train_loss': 2.0827982425689697, 'epoch': 16.0}


TrainOutput(global_step=128, training_loss=2.0827982425689697, metrics={'train_runtime': 81.8465, 'train_samples_per_second': 49.849, 'train_steps_per_second': 1.564, 'train_loss': 2.0827982425689697, 'epoch': 16.0})

In [None]:
model.save_pretrained("output_dir")